I’m attempting to create a line plot with historical data, and then a single point (scatter plot presumably) with the predicted value for my machine learning (sklearn) model. I plotted the historical data without too much trouble, and have since been adding code to plot the predicted value. The plot has a bokeh select menu that will allow me to choose the ID number for each item, thus selecting the appropriate plot and model. At the moment, the date used in the prediction is static (set at 03/31/2020), however, once I’m able to plot the static date, I plan to begin enabling a widget to allow for a user-selected date. Consequently, I’ve attempted to write the code so that the prediction is run once the new ID number is selected.
Since I’ve added the prediction code, things have broken down. Can anyone tell me where I’m going wrong?
Below is my current code for the plot itself.
The full code (including the regression code and corresponding data) can be found at my github here ( code: app_test.py
, data: pred_data.csv
, historical_data.csv
, features_created.pkd
.)
from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Select, DataRange1d, HoverTool
from bokeh.plotting import figure
# Set up (initial) data
historical_data = historical_data.loc[:, ['ndc', 'date', 'nadac_per_unit']]
historical_data = historical_data.sort_values('date')
historical_source = ColumnDataSource(historical_data[historical_data.loc[:, 'ndc']=='781593600'])
#
import datetime as dt
# prediction_data.loc[:, 'date'] = dt.datetime(2020, 3, 31)
prediction_data.loc[:, 'year'] = 2020
prediction_data.loc[:, 'month'] = 3
prediction_data.loc[:, 'day'] = 31
first_prediction = lin_model.predict(prediction_data)
first_prediction = pd.DataFrame(data = {'ndc':first_prediction[0][0], 'predictions':first_prediction[0][1][0]}, index = [0]) #these element slices are correct
first_prediction['date'] = pd.to_datetime(prediction_data[['year', 'month', 'day']], infer_datetime_format=True, errors = 'coerce')
prediction_source = ColumnDataSource(first_prediction[first_prediction.loc[:, 'ndc']=='781593600'])
id_list = list(prediction_data['ndc'].astype(str))
# Set up plot
plot = figure(plot_height=800, plot_width=800, title='Drug Price Over Time',
x_axis_type = 'datetime',
tools="crosshair, pan, reset, save, wheel_zoom")
plot.x_range = DataRange1d(range_padding = .01)
plot.add_tools(HoverTool(tooltips=[('Date', '@date{%F}'), ('Price', '@nadac_per_unit')],
formatters = {'date': 'datetime'}))
plot.line('date', 'nadac_per_unit', source=historical_source)
plot.scatter('date', 'predictions', source=prediction_source)
# Set up widgets
id_select = Select(title='drug_id', value='781593600', options=id_list)
# Set up callbacks
def update_data(attrname, old, new):
#Get the current select value
curr_id = id_select.value
# Generate the new data
new_historical = historical_data[historical_data['ndc']==curr_id]
new_historical = new_historical.sort_values('date')
prediction_data = prediction_data[prediction_data.loc[:, 'ndc']==curr_id]
new_prediction_data = lin_model.predict(prediction_data)
new_prediction_data = pd.DataFrame(data = {'ndc':new_prediction_data[0][0], 'predictions':new_prediction_data[0][1][0]}, index = [0]) #these element slices are correct
new_prediction_data['date'] = pd.to_datetime(prediction_data[['year', 'month', 'day']], infer_datetime_format=True, errors = 'coerce')
new_prediction_source = ColumnDataSource(new_prediction_data)
# Overwrite current data with new data
historical_source.data = ColumnDataSource.from_df(new_historical)
# prediction_source.data = ColumnDataSource.from_df(new_predicted)
# Action when select menu changes
id_select.on_change('value', update_data)
# Set up layouts and add to document
inputs = column(id_select)
curdoc().add_root(row(inputs, plot, width = 1000))
curdoc().title = 'Drug Price Predictor'