Skip to content

Instantly share code, notes, and snippets.

@mrocklin
Last active December 23, 2025 14:58
Show Gist options
  • Select an option

  • Save mrocklin/ea77759183647557d009c6c18ee15ba1 to your computer and use it in GitHub Desktop.

Select an option

Save mrocklin/ea77759183647557d009c6c18ee15ba1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "93ca94eb-cd93-463f-9fd4-a38906ac68f4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<dask.config.set at 0x105ef2fd0>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import dask\n",
"dask.config.set({\"array.query-planning\": True})"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "74e0269e-0c9c-405b-90f0-cc234a878974",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Bytes </th>\n",
" <td> 2.98 GiB </td>\n",
" <td> 128.00 MiB </td>\n",
" </tr>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> (20000, 20000) </td>\n",
" <td> (4096, 4096) </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 25 chunks in 1 expression </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> float64 numpy.ndarray </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"24\" x2=\"120\" y2=\"24\" />\n",
" <line x1=\"0\" y1=\"49\" x2=\"120\" y2=\"49\" />\n",
" <line x1=\"0\" y1=\"73\" x2=\"120\" y2=\"73\" />\n",
" <line x1=\"0\" y1=\"98\" x2=\"120\" y2=\"98\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"120\" />\n",
" <line x1=\"49\" y1=\"0\" x2=\"49\" y2=\"120\" />\n",
" <line x1=\"73\" y1=\"0\" x2=\"73\" y2=\"120\" />\n",
" <line x1=\"98\" y1=\"0\" x2=\"98\" y2=\"120\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.0\" y=\"140.0\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >20000</text>\n",
" <text x=\"140.0\" y=\"60.0\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,140.0,60.0)\">20000</text>\n",
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<ones, shape=(20000, 20000), dtype=float64, chunksize=(4096, 4096), chunktype=numpy.ndarray>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import dask.array as da\n",
"x = da.ones((20000, 20000))\n",
"x"
]
},
{
"cell_type": "markdown",
"id": "de4d53a6-2695-4b88-99de-e6a125f3611d",
"metadata": {},
"source": [
"## Rendering exprs as rich trees in tables\n",
"\n",
"- Orange ops are generally data producing\n",
"- Blue ops are generally data reducing\n",
"- Yellow lines highlight big memory layers"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "04918dbe-40a0-459a-8c7e-daa63a990a37",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Shape Bytes Chunks </span>\n",
" <span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> (20000, 20000) 3.0 GiB 4096×4096 \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation\u001b[0m\u001b[2m \u001b[0m\u001b[2m Shape\u001b[0m\u001b[2m \u001b[0m\u001b[2m Bytes\u001b[0m\u001b[2m \u001b[0m\u001b[2m Chunks\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1;38;2;206;92;0mOnes\u001b[0m (20000, 20000) 3.0 GiB 4096×4096"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.expr"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c4b781ae-8c59-44e9-a889-682f5ec84611",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Shape Bytes Chunks </span>\n",
" <span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Getitem</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 800 B</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Sum</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.0 GiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Sum</span> (20000, 5, 20000) 15 GiB 4096×1×4096 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Tensordot</span> (20000, 5, 20000) 15 GiB 4096×1×4096 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> ├ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.0 GiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Sub</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.0 GiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> ├ </span><span style=\"font-weight: bold\">Transpose</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.0 GiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> │ └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.0 GiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Mean Agg</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">156 kiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Mean Chunk</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">781 kiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20000, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.0 GiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 4096×4096</span> \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2m Shape\u001b[0m\u001b[2m \u001b[0m\u001b[2m Bytes\u001b[0m\u001b[2m \u001b[0m\u001b[2m Chunks\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1;38;2;52;101;164mGetitem\u001b[0m \u001b[2m (5, 20)\u001b[0m \u001b[2m 800 B\u001b[0m \u001b[2m 5×20\u001b[0m \n",
" \u001b[2m└ \u001b[0m\u001b[1;38;2;52;101;164mSum\u001b[0m \u001b[2m (20000, 20000)\u001b[0m \u001b[2m3.0 GiB\u001b[0m \u001b[2m 4096×4096\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mSum\u001b[0m (20000, 5, 20000) 15 GiB 4096×1×4096 \n",
" \u001b[2m └ \u001b[0m\u001b[1mTensordot\u001b[0m (20000, 5, 20000) 15 GiB 4096×1×4096 \n",
" \u001b[2m ├ \u001b[0m\u001b[1;38;2;206;92;0mOnes\u001b[0m \u001b[2m (20000, 20000)\u001b[0m \u001b[2m3.0 GiB\u001b[0m \u001b[2m 4096×4096\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mSub\u001b[0m \u001b[2m (20000, 20000)\u001b[0m \u001b[2m3.0 GiB\u001b[0m \u001b[2m 4096×4096\u001b[0m \n",
" \u001b[2m ├ \u001b[0m\u001b[1mTranspose\u001b[0m \u001b[2m (20000, 20000)\u001b[0m \u001b[2m3.0 GiB\u001b[0m \u001b[2m 4096×4096\u001b[0m \n",
" \u001b[2m │ └ \u001b[0m\u001b[1;38;2;206;92;0mOnes\u001b[0m \u001b[2m (20000, 20000)\u001b[0m \u001b[2m3.0 GiB\u001b[0m \u001b[2m 4096×4096\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;52;101;164mMean Agg\u001b[0m \u001b[2m (20000)\u001b[0m \u001b[2m156 kiB\u001b[0m \u001b[2m 4096\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mMean Chunk\u001b[0m \u001b[2m (5, 20000)\u001b[0m \u001b[2m781 kiB\u001b[0m \u001b[2m 1×4096\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;206;92;0mOnes\u001b[0m \u001b[2m (20000, 20000)\u001b[0m \u001b[2m3.0 GiB\u001b[0m \u001b[2m 4096×4096\u001b[0m"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.dot(x.T - x.mean(axis=0))[:5, :20].expr"
]
},
{
"cell_type": "markdown",
"id": "78101c05-f388-4553-ae43-cb5e1a5f79ad",
"metadata": {},
"source": [
"## Simplification\n",
"\n",
"Things like slices and rechunks are pretty good at pushing through other operations and into IO/Creation expressions."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c3857de1-d874-44c0-82ac-b0778551202c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Shape Bytes Chunks </span>\n",
" <span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Sum</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 800 B</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"font-weight: bold\">Sum</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 5, 20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.9 kiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×1×20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Tensordot</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 5, 20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">3.9 kiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×1×20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> ├ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">781 kiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×4096</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Sub</span> (20000, 20) 3.1 MiB 4096×20 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> ├ </span><span style=\"font-weight: bold\">Transpose</span> (20000, 20) 3.1 MiB 4096×20 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> │ └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> (20, 20000) 3.1 MiB 20×4096 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Mean Agg</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 160 B</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Mean Chunk</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 800 B</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1×20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Ones</span> (20000, 20) 3.1 MiB 4096×20 \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2m Shape\u001b[0m\u001b[2m \u001b[0m\u001b[2m Bytes\u001b[0m\u001b[2m \u001b[0m\u001b[2m Chunks\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1;38;2;52;101;164mSum\u001b[0m \u001b[2m (5, 20)\u001b[0m \u001b[2m 800 B\u001b[0m \u001b[2m 5×20\u001b[0m \n",
" \u001b[2m└ \u001b[0m\u001b[1mSum\u001b[0m \u001b[2m (5, 5, 20)\u001b[0m \u001b[2m3.9 kiB\u001b[0m \u001b[2m 5×1×20\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mTensordot\u001b[0m \u001b[2m (5, 5, 20)\u001b[0m \u001b[2m3.9 kiB\u001b[0m \u001b[2m 5×1×20\u001b[0m \n",
" \u001b[2m ├ \u001b[0m\u001b[1;38;2;206;92;0mOnes\u001b[0m \u001b[2m (5, 20000)\u001b[0m \u001b[2m781 kiB\u001b[0m \u001b[2m 5×4096\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mSub\u001b[0m (20000, 20) 3.1 MiB 4096×20 \n",
" \u001b[2m ├ \u001b[0m\u001b[1mTranspose\u001b[0m (20000, 20) 3.1 MiB 4096×20 \n",
" \u001b[2m │ └ \u001b[0m\u001b[1;38;2;206;92;0mOnes\u001b[0m (20, 20000) 3.1 MiB 20×4096 \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;52;101;164mMean Agg\u001b[0m \u001b[2m (20)\u001b[0m \u001b[2m 160 B\u001b[0m \u001b[2m 20\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mMean Chunk\u001b[0m \u001b[2m (5, 20)\u001b[0m \u001b[2m 800 B\u001b[0m \u001b[2m 1×20\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;206;92;0mOnes\u001b[0m (20000, 20) 3.1 MiB 4096×20"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.dot(x.T - x.mean(axis=0))[:5, :20].expr.simplify()"
]
},
{
"cell_type": "markdown",
"id": "3eef7b80-1b77-483f-93f6-aa8a900dce7a",
"metadata": {},
"source": [
"## Fusion\n",
"\n",
"And then at the end blockwises can fuse together pretty aggressively."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "aea21537-d5a8-499d-9774-f0a461147ae3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Shape Bytes Chunks </span>\n",
" <span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Sum</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 800 B</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×20</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">FusedBlockwise</span> (5, 5, 20) 3.9 kiB 5×1×20 \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2m Shape\u001b[0m\u001b[2m \u001b[0m\u001b[2m Bytes\u001b[0m\u001b[2m \u001b[0m\u001b[2mChunks\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1;38;2;52;101;164mSum\u001b[0m \u001b[2m (5, 20)\u001b[0m \u001b[2m 800 B\u001b[0m \u001b[2m 5×20\u001b[0m \n",
" \u001b[2m└ \u001b[0m\u001b[1;38;2;206;92;0mFusedBlockwise\u001b[0m (5, 5, 20) 3.9 kiB 5×1×20"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.dot(x.T - x.mean(axis=0))[:5, :20].expr.optimize()"
]
},
{
"cell_type": "markdown",
"id": "2214f007-4135-41d7-a746-f61aee002464",
"metadata": {},
"source": [
"## Zarr\n",
"\n",
"This of course works with real(ish) data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "bbe4869c-c086-4ac6-a0e6-1add36d62e11",
"metadata": {},
"outputs": [],
"source": [
"import dask\n",
"dask.config.set({\"array.query-planning\": True})\n",
"\n",
"import dask.array as da\n",
"import os, shutil\n",
"\n",
"if not os.path.exists(\"myfile.zarr\"):\n",
" x = da.random.random((20000, 20000), chunks=(1000, 1000))\n",
" x.to_zarr(\"myfile.zarr\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "95ed3501-9d14-4d5b-8768-27bd99796a59",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Bytes </th>\n",
" <td> 2.98 GiB </td>\n",
" <td> 7.63 MiB </td>\n",
" </tr>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> (20000, 20000) </td>\n",
" <td> (1000, 1000) </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 400 chunks in 1 expression </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> float64 numpy.ndarray </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"6\" x2=\"120\" y2=\"6\" />\n",
" <line x1=\"0\" y1=\"12\" x2=\"120\" y2=\"12\" />\n",
" <line x1=\"0\" y1=\"18\" x2=\"120\" y2=\"18\" />\n",
" <line x1=\"0\" y1=\"24\" x2=\"120\" y2=\"24\" />\n",
" <line x1=\"0\" y1=\"30\" x2=\"120\" y2=\"30\" />\n",
" <line x1=\"0\" y1=\"36\" x2=\"120\" y2=\"36\" />\n",
" <line x1=\"0\" y1=\"42\" x2=\"120\" y2=\"42\" />\n",
" <line x1=\"0\" y1=\"48\" x2=\"120\" y2=\"48\" />\n",
" <line x1=\"0\" y1=\"54\" x2=\"120\" y2=\"54\" />\n",
" <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
" <line x1=\"0\" y1=\"66\" x2=\"120\" y2=\"66\" />\n",
" <line x1=\"0\" y1=\"72\" x2=\"120\" y2=\"72\" />\n",
" <line x1=\"0\" y1=\"78\" x2=\"120\" y2=\"78\" />\n",
" <line x1=\"0\" y1=\"84\" x2=\"120\" y2=\"84\" />\n",
" <line x1=\"0\" y1=\"90\" x2=\"120\" y2=\"90\" />\n",
" <line x1=\"0\" y1=\"96\" x2=\"120\" y2=\"96\" />\n",
" <line x1=\"0\" y1=\"102\" x2=\"120\" y2=\"102\" />\n",
" <line x1=\"0\" y1=\"108\" x2=\"120\" y2=\"108\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"6\" y1=\"0\" x2=\"6\" y2=\"120\" />\n",
" <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"120\" />\n",
" <line x1=\"18\" y1=\"0\" x2=\"18\" y2=\"120\" />\n",
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"120\" />\n",
" <line x1=\"30\" y1=\"0\" x2=\"30\" y2=\"120\" />\n",
" <line x1=\"36\" y1=\"0\" x2=\"36\" y2=\"120\" />\n",
" <line x1=\"42\" y1=\"0\" x2=\"42\" y2=\"120\" />\n",
" <line x1=\"48\" y1=\"0\" x2=\"48\" y2=\"120\" />\n",
" <line x1=\"54\" y1=\"0\" x2=\"54\" y2=\"120\" />\n",
" <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
" <line x1=\"66\" y1=\"0\" x2=\"66\" y2=\"120\" />\n",
" <line x1=\"72\" y1=\"0\" x2=\"72\" y2=\"120\" />\n",
" <line x1=\"78\" y1=\"0\" x2=\"78\" y2=\"120\" />\n",
" <line x1=\"84\" y1=\"0\" x2=\"84\" y2=\"120\" />\n",
" <line x1=\"90\" y1=\"0\" x2=\"90\" y2=\"120\" />\n",
" <line x1=\"96\" y1=\"0\" x2=\"96\" y2=\"120\" />\n",
" <line x1=\"102\" y1=\"0\" x2=\"102\" y2=\"120\" />\n",
" <line x1=\"108\" y1=\"0\" x2=\"108\" y2=\"120\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#8B4903A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.0\" y=\"140.0\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >20000</text>\n",
" <text x=\"140.0\" y=\"60.0\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,140.0,60.0)\">20000</text>\n",
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<from-zarr-c5755af454bc8f5b1d4eb4a43f38007e, shape=(20000, 20000), dtype=float64, chunksize=(1000, 1000), chunktype=numpy.ndarray>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = da.from_zarr(\"myfile.zarr\")\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "03ff5d34-3097-4fd2-b224-492704c5fc53",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Shape Bytes Chunks </span>\n",
" <span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Getitem</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> (5, 20000)</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">781 kiB</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 5×838</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"font-weight: bold\">Rechunk</span> (20000, 20000) 3.0 GiB 20000×838 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Add</span> (20000, 20000) 3.0 GiB 1000×1000 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">FromArray</span> (20000, 20000) 3.0 GiB 1000×1000 \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2m Shape\u001b[0m\u001b[2m \u001b[0m\u001b[2m Bytes\u001b[0m\u001b[2m \u001b[0m\u001b[2m Chunks\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1;38;2;52;101;164mGetitem\u001b[0m \u001b[2m (5, 20000)\u001b[0m \u001b[2m781 kiB\u001b[0m \u001b[2m 5×838\u001b[0m \n",
" \u001b[2m└ \u001b[0m\u001b[1mRechunk\u001b[0m (20000, 20000) 3.0 GiB 20000×838 \n",
" \u001b[2m └ \u001b[0m\u001b[1mAdd\u001b[0m (20000, 20000) 3.0 GiB 1000×1000 \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;206;92;0mFromArray\u001b[0m (20000, 20000) 3.0 GiB 1000×1000"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(x + 1).rechunk((-1, \"auto\"))[:5].expr"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c3b2e0c1-bb5f-4b32-add4-3c09a5dfac95",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Shape Bytes Chunks </span>\n",
" <span style=\"font-weight: bold\">Add</span> (20000) 156 kiB 838 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Getitem</span> (20000) 156 kiB 838 \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">FromArray</span> (1, 20000) 156 kiB 1×838 \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2m Shape\u001b[0m\u001b[2m \u001b[0m\u001b[2m Bytes\u001b[0m\u001b[2m \u001b[0m\u001b[2mChunks\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1mAdd\u001b[0m (20000) 156 kiB 838 \n",
" \u001b[2m└ \u001b[0m\u001b[1;38;2;52;101;164mGetitem\u001b[0m (20000) 156 kiB 838 \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;206;92;0mFromArray\u001b[0m (1, 20000) 156 kiB 1×838"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(x + 1).rechunk((-1, \"auto\"))[5].expr.simplify()"
]
},
{
"cell_type": "markdown",
"id": "629146da-b390-4c19-b598-17cb1178b012",
"metadata": {},
"source": [
"## Dataframes get the repr too"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "f7738f6d-c13e-4865-b2ef-e7c3e3df16d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dask Series Structure:\n",
"npartitions=1\n",
" float64\n",
" ...\n",
"Dask Name: getitem, 5 expressions"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import dask\n",
"df = dask.datasets.timeseries()\n",
"out = df.groupby(df.name).x.mean()\n",
"out"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "22f67292-b4ea-44d4-88ee-c60e7feea784",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><strong>Dask DataFrame Structure:</strong></div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>id</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" </tr>\n",
" <tr>\n",
" <th>npartitions=30</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2000-01-01</th>\n",
" <td>string</td>\n",
" <td>int64</td>\n",
" <td>float64</td>\n",
" <td>float64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2000-01-02</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2000-01-30</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2000-01-31</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<div>Dask Name: to_string_dtype, 2 expressions</div>"
],
"text/plain": [
"Dask DataFrame Structure:\n",
" name id x y\n",
"npartitions=30 \n",
"2000-01-01 string int64 float64 float64\n",
"2000-01-02 ... ... ... ...\n",
"... ... ... ... ...\n",
"2000-01-30 ... ... ... ...\n",
"2000-01-31 ... ... ... ...\n",
"Dask Name: to_string_dtype, 2 expressions"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "2c2e3166-8a26-4cbd-ab57-1f21cb88ff56",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Par… Columns Dtypes </span>\n",
" <span style=\"font-weight: bold\">Projection</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">x </span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> float64</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Mean</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">x </span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> float64</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">Projection</span> 30 name, x float64, string \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">ArrowStringConversion</span> 30 name, ... (4 cols) float64, int64, stri… \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Timeseries</span> 30 name, ... (4 cols) float64, int64, obje… \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2mPar…\u001b[0m\u001b[2m \u001b[0m\u001b[2mColumns \u001b[0m\u001b[2m \u001b[0m\u001b[2m Dtypes\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1mProjection\u001b[0m \u001b[2m 1\u001b[0m \u001b[2mx \u001b[0m \u001b[2m float64\u001b[0m \n",
" \u001b[2m└ \u001b[0m\u001b[1;38;2;52;101;164mMean\u001b[0m \u001b[2m 1\u001b[0m \u001b[2mx \u001b[0m \u001b[2m float64\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mProjection\u001b[0m 30 name, x float64, string \n",
" \u001b[2m └ \u001b[0m\u001b[1mArrowStringConversion\u001b[0m 30 name, ... (4 cols) float64, int64, stri… \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;206;92;0mTimeseries\u001b[0m 30 name, ... (4 cols) float64, int64, obje…"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out.expr"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "75a2e8de-62b3-4062-b467-57f35a300eb5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> Operation Parts Columns Dtypes </span>\n",
" <span style=\"font-weight: bold\">Projection</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">x </span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> float64</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">└ </span><span style=\"color: #3465a4; text-decoration-color: #3465a4; font-weight: bold\">Mean</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> 1</span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">x </span> <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> float64</span> \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"font-weight: bold\">ArrowStringConversion</span> 30 name, x float64, string \n",
" <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\"> └ </span><span style=\"color: #ce5c00; text-decoration-color: #ce5c00; font-weight: bold\">Timeseries</span> 30 name, x float64, object \n",
"</pre>"
],
"text/plain": [
"\u001b[2m \u001b[0m\u001b[2mOperation \u001b[0m\u001b[2m \u001b[0m\u001b[2mParts\u001b[0m\u001b[2m \u001b[0m\u001b[2mColumns\u001b[0m\u001b[2m \u001b[0m\u001b[2m Dtypes\u001b[0m\u001b[2m \u001b[0m\n",
" \u001b[1mProjection\u001b[0m \u001b[2m 1\u001b[0m \u001b[2mx \u001b[0m \u001b[2m float64\u001b[0m \n",
" \u001b[2m└ \u001b[0m\u001b[1;38;2;52;101;164mMean\u001b[0m \u001b[2m 1\u001b[0m \u001b[2mx \u001b[0m \u001b[2m float64\u001b[0m \n",
" \u001b[2m └ \u001b[0m\u001b[1mArrowStringConversion\u001b[0m 30 name, x float64, string \n",
" \u001b[2m └ \u001b[0m\u001b[1;38;2;206;92;0mTimeseries\u001b[0m 30 name, x float64, object"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out.simplify().expr"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "71fef705-08bd-454a-bc29-66e36b983975",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dask Series Structure:\n",
"npartitions=1\n",
" float64\n",
" ...\n",
"Dask Name: getitem, 5 expressions"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d248a25a-a19f-4b6e-b566-cf4face547d5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment