import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show,save
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource,HoverTool, Range1d,Legend
from bokeh.palettes import Reds,Greys
from bokeh.layouts import column,row,grid
from bokeh.transform import factor_cmap, dodge
from bokeh.models import BasicTickFormatter
output_notebook()
df = pd.DataFrame({'Group' : np.random.choice(['A','B','C','D'], 400),
'Points1' : np.random.randint(100,1000,400),
'Points2': np.random.randint(100,1000,400)
})
df['Total']=df.Points1+df.Points2
df
source=ColumnDataSource(data=dict(df,p1=df.Points1))
def box_plot(df, vals, label, ylabel=None,xlabel=None,title=None):
# Group Data frame
df_gb = df.groupby(label)
# Get the categories
cats = list(df_gb.groups.keys())
# Compute quartiles for each group
q1 = df_gb[vals].quantile(q=0.25)
q2 = df_gb[vals].quantile(q=0.5)
q3 = df_gb[vals].quantile(q=0.75)
# Compute interquartile region and upper and lower bounds for outliers
iqr = q3 - q1
upper_cutoff = q3 + 1.5*iqr
lower_cutoff = q1 - 1.5*iqr
# Find the outliers for each category
def outliers(group):
cat = group.name
outlier_inds = (group[vals] > upper_cutoff[cat]) \
| (group[vals] < lower_cutoff[cat])
return group[vals][outlier_inds]
# Apply outlier finder
out = df_gb.apply(outliers).dropna()
# Points of outliers for plotting
outx = []
outy = []
for cat in cats:
# only add outliers if they exist
if cat in out and not out[cat].empty:
for value in out[cat]:
outx.append(cat)
outy.append(value)
# If outliers, shrink whiskers to smallest and largest non-outlier
qmin = df_gb[vals].min()
qmax = df_gb[vals].max()
upper = [min([x,y]) for (x,y) in zip(qmax, upper_cutoff)]
lower = [max([x,y]) for (x,y) in zip(qmin, lower_cutoff)]
cats = [str(i) for i in cats]
# Build figure
p = figure(sizing_mode='stretch_width', x_range=cats,height=300,toolbar_location=None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_width = 2
p.yaxis.axis_label = ylabel
p.xaxis.axis_label = xlabel
p.title=title
p.y_range.start=0
p.title.align = 'center'
# stems
p.segment(cats, upper, cats, q3, line_width=2, line_color="black")
p.segment(cats, lower, cats, q1, line_width=2, line_color="black")
# boxes
p.rect(cats, (q3 + q1)/2, 0.5, q3 - q1, fill_color=['#a50f15', '#de2d26', '#fb6a4a', '#fcae91'],
alpha=0.7, line_width=2, line_color="black")
# median (almost-0 height rects simpler than segments)
p.rect(cats, q2, 0.5, 0.01, line_color="black", line_width=2)
# whiskers (almost-0 height rects simpler than segments)
p.rect(cats, lower, 0.2, 0.01, line_color="black")
p.rect(cats, upper, 0.2, 0.01, line_color="black")
# outliers
p.circle(outx, outy, size=6, color="black")
p.add_tools(HoverTool(tooltips=[('Points','@p1')]))
return p
p = box_plot(df, 'Points1', 'Group', ylabel='Total spread',title='BoxPlot')
show(p)
I am trying to include information for each outlier when hovered over(not in this particular dataset), and each statistical value such as quartiles, median etc., but when i hover it only shows “???”
I am a noob so any guidance/edits would be really helpful