Generating a secondary plot from a selected data point in another plot

Miles_Winter · November 5, 2017, 5:49pm

I’m trying to figure out the best way to link data points in one plot to a set of secondary plots, where each data point has it’s own secondary plot. For example, let say we have a two panel figure where one plot is a scatter plot and the other panel is a histogram summarizing the underlying distribution for a single data point in the scatter plot. When you select/hover over a data point from the scatter plot, the corresponding histogram for that data point is shown in the other panel. Any ideas on how to make this happen? Thanks!

Sebastien_Roche · November 18, 2017, 4:55pm

There are several ways you can do that. Here is an example. You can put select_fig and hist_fig in separate panels if you want.

Basically in your ‘selection plot’ you should have a source and a taptool in order to have a callback that does something when you click on a point.

Then it’s just a matter of updating the data source of your secondary plots. It will become more involved if each point is supposed to bring up a different kind of plot.

Here I reworked the histogram example (https://bokeh.pydata.org/en/latest/docs/gallery/histogram.html) to select which plot to display using a scatter plot.

-- coding: utf-8 --

import numpy as np

import scipy.special

from bokeh.io import curdoc

from bokeh.layouts import gridplot

from bokeh.models import ColumnDataSource, LabelSet

from bokeh.plotting import figure

params = {

'normal':{'title':'Normal Distribution (μ=0, σ=0.5)',},

'log normal':{'title':'Log Normal Distribution (μ=0, σ=0.5)',},

'gamma':{'title':'Gamma Distribution (k=1, θ=2)',},

'weibull':{'title':'Weibull Distribution (λ=1, k=1.25)',},

}

def update_hist_fig(attr,old,new):

try:

	new_id = new['1d']['indices'][0]

except IndexError:

	return

select_points = curdoc().select_one({"name":"select_points"})

selected = select_points.data_source.data['txt'][new_id]

hist_fig = curdoc().select_one({"name":"hist_fig"})

hist_fig.title.text = params[selected]['title']

hist_data = params[selected]['hist_data']

hist = curdoc().select_one({"name":"hist"})

hist.data_source.data.update(hist_data)

lines_data = params[selected]['lines_data']

pdf_line = curdoc().select_one({"name":"pdf"})

pdf_line.data_source.data.update(lines_data)

cdf_line = curdoc().select_one({"name":"cdf"})

cdf_line.data_source.data.update(lines_data)

hist_fig = figure(title=“Normal Distribution (μ=0, σ=0.5)”,tools=“save”,background_fill_color=“#E8DDCB”,name=“hist_fig”)

mu, sigma = 0, 0.5

measured = np.random.normal(mu, sigma, 1000)

hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(-2, 2, 1000)

pdf = 1/(sigma * np.sqrt(2np.pi)) * np.exp(-(x-mu)**2 / (2sigma**2))

cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2

params[‘normal’][‘hist_data’] = {‘top’:hist,‘left’:edges[:-1],‘right’:edges[1:]}

params[‘normal’][‘lines_data’] = {‘x’:x,‘pdf’:pdf,‘cdf’:cdf}

source = ColumnDataSource(data=params[‘normal’][‘hist_data’])

hist_fig.quad(top=‘top’, bottom=0, left=‘left’, right=‘right’,fill_color=“#036564”, line_color=“#033649”,name=‘hist’,source=source)

source = ColumnDataSource(data=params[‘normal’][‘lines_data’])

hist_fig.line(x=‘x’, y=‘pdf’, line_color=“#D95B43”, line_width=8, alpha=0.7, legend=“PDF”,source=source,name=‘pdf’)

hist_fig.line(x=‘x’, y=‘cdf’, line_color=“white”, line_width=2, alpha=0.7, legend=“CDF”,source=source,name=‘cdf’)

hist_fig.legend.location = “center_right”

hist_fig.legend.background_fill_color = “darkgrey”

hist_fig.xaxis.axis_label = ‘x’

hist_fig.yaxis.axis_label = ‘Pr(x)’

mu, sigma = 0, 0.5

measured = np.random.lognormal(mu, sigma, 1000)

hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 8.0, 1000)

pdf = 1/(x* sigma * np.sqrt(2np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2sigma**2))

cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2

params[‘log normal’][‘hist_data’] = {‘top’:hist,‘left’:edges[:-1],‘right’:edges[1:]}

params[‘log normal’][‘lines_data’] = {‘x’:x,‘pdf’:pdf,‘cdf’:cdf}

k, theta = 1.0, 2.0

measured = np.random.gamma(k, theta, 1000)

hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 20.0, 1000)

pdf = x**(k-1) * np.exp(-x/theta) / (theta**k * scipy.special.gamma(k))

cdf = scipy.special.gammainc(k, x/theta) / scipy.special.gamma(k)

params[‘gamma’][‘hist_data’] = {‘top’:hist,‘left’:edges[:-1],‘right’:edges[1:]}

params[‘gamma’][‘lines_data’] = {‘x’:x,‘pdf’:pdf,‘cdf’:cdf}

lam, k = 1, 1.25

measured = lam*(-np.log(np.random.uniform(0, 1, 1000)))**(1/k)

hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 8, 1000)

pdf = (k/lam)*(x/lam)**(k-1) * np.exp(-(x/lam)**k)

cdf = 1 - np.exp(-(x/lam)**k)

params[‘weibull’][‘hist_data’] = {‘top’:hist,‘left’:edges[:-1],‘right’:edges[1:]}

params[‘weibull’][‘lines_data’] = {‘x’:x,‘pdf’:pdf,‘cdf’:cdf}

select_fig = figure(tools=‘tap’,outline_line_alpha=0,min_border_top=20)

select_fig.renderers =

select_source = ColumnDataSource(data={‘x’:[1,1,1,1],‘y’:[0,1,2,3],‘txt’:[‘normal’,‘log normal’,‘gamma’,‘weibull’]})

select_fig.scatter(x=‘x’,y=‘y’,size=10,source=select_source,name=‘select_points’)

labset = LabelSet(x=‘x’,y=‘y’,text=‘txt’,y_offset=-5,x_offset=10,source=select_source)

select_fig.add_layout(labset)

select_source.on_change(‘selected’,update_hist_fig)

curdoc().add_root(gridplot(select_fig,hist_fig, ncols=2, plot_width=400, plot_height=400, toolbar_location=None))

``

Miles_Winter · November 19, 2017, 2:45pm

Thanks Sébastien! This will definitely get the job done.