Below is a minimal example of my problem: that is, make my coding more efficient. Example is an embedded Bokeh app in Jupyter Notebooks that selects data series for plotting as a multi_line. I am new to Bokeh and really don’t have a feel for how to tweak speed from my code.
Project: I plan to feed in vintages of monthly forecasts to compare earlier forecast values with later forecasted ones. There are up to 12 forecasts with several thousand data series in each forecast. So, the intended data (i.e., the data dictionary) will be much larger…likely around 100 MB in csv file format.
I am using Bokeh version 3.2.1 from an Anaconda install of 3.11 python distribution.
Questions:
- Would a javascript callback work faster?
- Am I just out of luck on speed when it comes to passing a larger dataframe or data dictionary to multi_line?
- Is it possible to use output_file, rather than output_notebook?
Thanks,
Ant
# pandas and numpy for data manipulation
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook, output_file
from bokeh.models import (ColumnDataSource, MultiSelect, MultiChoice,
TabPanel, Tabs, HoverTool, TapTool,
BoxZoomTool, ResetTool)
from bokeh.layouts import column, row
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
output_notebook()
# create a sample data source
data = {'x': ["2010-01-01", "2010-02-01", "2010-03-01", "2010-04-01", "2010-05-01"], 'y1': [2, 4, 6, 8, 10], 'y2': [1, 3, 5, 7, 9], 'y3': [10, 8, 6, 4, 2]}
#source = ColumnDataSource(data)
xs = []
ys = []
colors = []
labels = []
eia_colors = [
'#416a23','#006997','#84511d','#cc9900',
'#72242d','#49366e','#b34e16','#333333',
'#5d9732','#0096d7','#bd732a','#ffc702'
]
dates = pd.to_datetime(data['x'], format='%Y-%m-%d')
data['x'] = dates
#dates = np.array(data['x'], dtype=np.datetime64)
source = ColumnDataSource(data={'x': xs, 'y': ys, 'color': colors, 'label': labels})
figure_opts = dict(width=450, height=300, x_axis_type="datetime")
hover_opts = dict(
tooltips=[('x_value', '@x'), ('y_value', '@y')],
show_arrow=False,
line_policy='next'
)
line_opts = dict(
line_width=3, line_color='color', line_alpha=0.6,
hover_line_color='color', hover_line_alpha=1.0,
source=source, legend_field='label'
)
def bkapp(doc):
# create a figure with multi_line glyph
p = figure(tools=[HoverTool(**hover_opts), BoxZoomTool(), ResetTool(), TapTool()], **figure_opts)
lines = p.multi_line(xs=[data['x']]*(len(data)-1), ys=[data[col] for col in data if col != 'x'], line_color='blue', line_alpha=[0]*(len(data)-1)) # do I need to initialize a multi_line glyph?
# create a MultiSelect or MultiChoice widget to select columns
columns = list(data.keys())[1:]
#multi_select = MultiSelect(title='Columns:', options=columns)
multi_choice = MultiChoice(title='Columns:', options=columns)
# define a callback function for the MultiChoice widget
def update_lines(attrname, old, new):
selected_columns = multi_choice.value
xs = [dates]*len(selected_columns)
ys = [data[col] for col in selected_columns]
colors = eia_colors[:len(selected_columns)]
labels = [[col] for col in selected_columns]
source.data = {'x': xs,
'y': ys,
'color': colors,
'label': labels}
lines = p.multi_line(xs='x', ys='y', **line_opts)
p.legend.location = "bottom"
p.xaxis.axis_line_width = 2
p.xaxis.axis_line_color = "black"
multi_choice.on_change('value', update_lines)
print(multi_choice.value)
layout = row(multi_choice, p)
tab = TabPanel(child = layout, title = 'Vintages')
tabs = Tabs(tabs=[tab])
doc.add_root(tabs)
# Set up an application
handler=FunctionHandler(bkapp)
app=Application(handler)
# show the plot and the widget together
show(app)