This notebook demonstrates how to use DataShader to display large datasets inside a plotly FigureWidget. Change callbacks are used to recompute the datashader image whenever the axis range or figure size changes
$ conda install datashader -y
# ipyplotly
from plotly.graph_objs import FigureWidget
# core
import io
import base64
import time
# pandas
import pandas as pd
# numpy
import numpy as np
# scikit learn
from sklearn import datasets
# datashader
import datashader as ds
import datashader.transfer_functions as tf
from datashader.colors import inferno
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-1-7429c7dd0e8a> in <module>() 17 18 # datashader ---> 19 import datashader as ds 20 import datashader.transfer_functions as tf 21 from datashader.colors import inferno ImportError: No module named datashader
We will create a large dataset by duplicating the Iris dataset many times with random noise
num_copies = 7000 # 1,050,000 rows
iris_data = datasets.load_iris()
feature_names = [name.replace(' (cm)', '').replace(' ', '_') for name in iris_data.feature_names]
iris_df_orig = pd.DataFrame(iris_data.data, columns=feature_names)
target_orig = iris_data.target + 1
# frame of features
iris_df = pd.concat(
np.random.normal(scale=0.2, size=iris_df_orig.shape) + iris_df_orig for i in range(num_copies)
).reset_index(drop=True)
# array of targets
target = [t for i in range(num_copies) for t in target_orig]
# dataframe that includes target as categorical
iris_target_df = pd.concat([iris_df, pd.Series(target, name='target', dtype='category')], axis=1)
iris_df.describe()
| sepal_length | sepal_width | petal_length | petal_width | |
|---|---|---|---|---|
| count | 1.050000e+06 | 1.050000e+06 | 1.050000e+06 | 1.050000e+06 |
| mean | 5.843334e+00 | 3.054174e+00 | 3.758513e+00 | 1.198656e+00 |
| std | 8.491408e-01 | 4.760660e-01 | 1.769797e+00 | 7.867412e-01 |
| min | 3.570904e+00 | 1.288017e+00 | 1.595338e-01 | -7.130906e-01 |
| 25% | 5.156203e+00 | 2.740191e+00 | 1.637389e+00 | 3.925065e-01 |
| 50% | 5.801178e+00 | 3.033385e+00 | 4.307065e+00 | 1.316142e+00 |
| 75% | 6.443619e+00 | 3.346452e+00 | 5.142117e+00 | 1.827593e+00 |
| max | 8.665589e+00 | 5.200911e+00 | 7.662023e+00 | 3.353820e+00 |
Define a function that inputs an x/y ranges and the plot width/height and generates a DataShader image of the dataset. The image will be returned as a PIL image object
def gen_ds_image(x_range, y_range, plot_width, plot_height):
if x_range is None or y_range is None or plot_width is None or plot_height is None:
return None
cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
agg_scatter = cvs.points(iris_target_df,
'sepal_length', 'sepal_width',
ds.count_cat('target'))
img = tf.shade(agg_scatter)
img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle')
return img.to_pil()
x_range=[3, 10]
y_range=[0, 6]
plot_height=500
plot_width=700
# Test image generation function and display the PIL image
initial_img = gen_ds_image(x_range, y_range, plot_width, plot_height)
initial_img
f = FigureWidget(data=[{'x': x_range,
'y': y_range,
'mode': 'markers',
'marker': {'opacity': 0}}], # invisible trace to init axes and to support autoresize
layout={'width': plot_width, 'height': plot_height})
f
Failed to display Jupyter Widget of type FigureWidget.
If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
# Set background image
f.layout.images = [dict(
source = initial_img, # plotly now performs auto conversion of PIL image to png data URI
xref = "x",
yref = "y",
x = x_range[0],
y = y_range[1],
sizex = x_range[1] - x_range[0],
sizey = y_range[1] - y_range[0],
sizing = "stretch",
layer = "below")]
def update_ds_image(layout, x_range, y_range, plot_width, plot_height):
img = f.layout.images[0]
# Update with batch_update so all updates happen simultaneously
with f.batch_update():
img.x = x_range[0]
img.y = y_range[1]
img.sizex = x_range[1] - x_range[0]
img.sizey = y_range[1] - y_range[0]
img.source = gen_ds_image(x_range, y_range, plot_width, plot_height)
# Install callback to run exactly once if one or more of the following properties changes
# - xaxis range
# - yaxis range
# - figure width
# - figure height
f.layout.on_change(update_ds_image, ('xaxis', 'range'), ('yaxis', 'range'), 'width', 'height')
f.layout.dragmode = 'zoom'
f
Failed to display Jupyter Widget of type FigureWidget.
If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
f.layout.xaxis.range = [3.5, 9]
f
Failed to display Jupyter Widget of type FigureWidget.
If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
If you're reading this message in another frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
with f.batch_update():
f.layout.width = 1000
f.layout.height = 500