Hi Sarah,
Sorry for not being specific enough. And congrats on the 0.9.3 release! Please find sample data as well as code attached.
code:
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure, ColumnDataSource
from datetime import datetime
output_notebook()
PATH = ‘’
filename = ‘test_RR.xlsx’
df = pd.read_excel(PATH + filename)
df[‘TA_END’] = np.where(pd.isnull(df[‘ET Date’]), df.L_END, np.where(df[‘ET Date’] < df.L_END, df[‘ET Date’], df.L_END)) # just some data cleaning, don’t bother with this
GFA_SCALE_FACTOR = 2
df[‘GFA_radius’] = np.sqrt( df.GFA / np.pi ) * GFA_SCALE_FACTOR
import seaborn as sns
colors = list(sns.cubehelix_palette(28, start=.5, rot=-.75))
hex_colors = np.array([‘#%02x%02x%02x’ % (c[0]*255, c[1]*255, c[2]*255) for c in colors])
df[‘color’] = hex_colors[df.FL - 4]
``
And error occurs here:
source = ColumnDataSource(df)
p = figure(x_axis_type=“datetime”, width = 800, height = 400)
p.circle(x=‘TA_END’, y=‘Eff Rent’,
size= ‘GFA_radius’,
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color = ‘color’, source = source)
show(p)
``
Error message:
<a class='attachment' href='//bokeh-discourse-uploads.s3.dualstack.us-east-1.amazonaws.com/original/1X/a892e645c853374525d9a3d26a73dff24adf7a9e.xlsx'>test_RR.xlsx</a> (7.92 KB)
<a class='attachment' href='//bokeh-discourse-uploads.s3.dualstack.us-east-1.amazonaws.com/original/1X/1eb50bc429895b399c263366e4c526a52dd1fbdb.ipynb'>Bokeh TA Chart.ipynb</a> (1.61 MB)
<details class='elided'>
<summary title='Show trimmed content'>···</summary>
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-ca5f62d590cb> in <module>()
5 size= 'GFA_radius',
6 fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color = 'color', source = source)
----> 7 show(p)
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\io.pyc in show(obj, browser, new)
235
236 '''
--> 237 _show_with_state(obj, _state, browser, new)
238
239 def _show_with_state(obj, state, browser, new):
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\io.pyc in _show_with_state(obj, state, browser, new)
242
243 if state.notebook:
--> 244 _show_notebook_with_state(obj, state)
245
246 elif state.session:
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\io.pyc in _show_notebook_with_state(obj, state)
260 publish_display_data({'text/html': snippet})
261 else:
--> 262 publish_display_data({'text/html': notebook_div(obj)})
263
264 def _show_server_with_state(obj, state, new, controller):
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\embed.pyc in notebook_div(plot_object)
95 modelid = ref["id"],
96 modeltype = ref["type"],
---> 97 all_models = serialize_json(plot_object.dump()),
98 )
99 script = PLOT_SCRIPT.render(
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\protocol.pyc in serialize_json(obj, encoder, **kwargs)
122 if settings.pretty(False):
123 kwargs["indent"] = 4
--> 124 return json.dumps(obj, cls=encoder, **kwargs)
125
126 deserialize_json = json.loads
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\json\__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, sort_keys, **kw)
248 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
249 separators=separators, encoding=encoding, default=default,
--> 250 sort_keys=sort_keys, **kw).encode(obj)
251
252
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\json\encoder.pyc in encode(self, o)
205 # exceptions aren't as detailed. The list call should be roughly
206 # equivalent to the PySequence_Fast that ''.join() would do.
--> 207 chunks = self.iterencode(o, _one_shot=True)
208 if not isinstance(chunks, (list, tuple)):
209 chunks = list(chunks)
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\json\encoder.pyc in iterencode(self, o, _one_shot)
268 self.key_separator, self.item_separator, self.sort_keys,
269 self.skipkeys, _one_shot)
--> 270 return _iterencode(o, 0)
271
272 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\protocol.pyc in default(self, obj)
117 return obj.to_css()
118 else:
--> 119 return self.transform_python_types(obj)
120
121 def serialize_json(obj, encoder=BokehJSONEncoder, **kwargs):
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\site-packages\bokeh\protocol.pyc in transform_python_types(self, obj)
83 # Datetime, Date
84 elif isinstance(obj, (dt.datetime, dt.date)):
---> 85 return calendar.timegm(obj.timetuple()) * 1000.
86 # Numpy datetime64
87 elif isinstance(obj, np.datetime64):
C:\Users\paul.dong\Documents\WinPython-64bit-2.7.9.4\python-2.7.9.amd64\lib\calendar.pyc in timegm(tuple)
611 """Unrelated but handy function to calculate Unix timestamp from GMT."""
612 year, month, day, hour, minute, second = tuple[:6]
--> 613 days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1
614 hours = days*24 + hour
615 minutes = hours*60 + minute
ValueError: month must be in 1..12
``
Your help would be greatly appreciated. I did managed to get the code to work, only by break down the dataframe to Series and input them directly to ColumnDataSource. like this:
source = ColumnDataSource(
data=dict(
x=df.TA_END,
y=df[‘Eff Rent’],
size = df.GFA_radius,
color = df.color,
name = df.name,
L_START = [t.strftime(‘%m/%d/%Y’) for t in df.L_START]
)
)
``
In short, I am just curious why the first construction of ColumnDataSource didn’t work and the second somehow works.
Bests,
Paul
On Friday, 28 August 2015 20:03:44 UTC+8, Sarah Bird wrote:
Hi Paul,
There isn’t enough detail in your code sample to know what might be going wrong. You may need to post a sample of the data and the full code and error message somewhere.
The codebase that handles timestamps is here if you haven’t it yet: https://github.com/birdsarah/gtimelog-viz/
It’s not an area that I’m great at, but if you post us some data and your full bokeh code, maybe we can help.
On Fri, Aug 28, 2015 at 9:53 AM, PaulYCDong [email protected] wrote:
Hi Sarah,
I would say I am a big fan of your work, not only your excellent contribution to Bokeh, but your humanitarian efforts too!
I watched your talk on the Europython 2015 and indeed I learned a lot, so I paste link to the source code of your example code here for the benefit of the rest of us in the forum. This talk actually made me less intimidated about the model interface.
https://github.com/birdsarah/europython-2015-bokeh and your github has a lot more helpful stuff in learning Bokeh, will spend some time on these.
I think I may be close to understand why my original code wouldn’t work - when imported to the ColumnDataSource from pandas dataframe, all dates would be typed as pandas Timestamp, where as input as pandas Series, they would be typed numpy datetime64.
The difference, I believe should be subtle, but somehow it is breaking down my code above.
from bokeh.models import HoverTool
source = ColumnDataSource(df)
p = figure(x_axis_type=“datetime”, width = 800, height = 400)
p.circle(x=‘TA_END’, y=‘Eff Rent’,
size= ‘GFA_radius’,
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color = ‘color’, source = source)
show(p)
However, I did saw you working with the Timestamp axis in your example, so I am not sure what would be the problem.
Hope you or someone else would help me to understand the issue further.
thanks and regards,
Paul
On Friday, 28 August 2015 04:40:41 UTC+8, Sarah Bird wrote:
Hi Paul,
“But I really hope someone could do a quick tutorial rendering the same plot under different interfaces, chart, plotting and models, so that we can fully appreciate how the inner workings of Bokeh,”
I tried to cover this in my Europython talk (https://youtu.be/EQCtGIdA0nE?t=6m13s) - if you haven’t seen it, maybe it would help. If you did, any feedback on what more you’d like to see would be great.
Sincerely,
Sarah Bird
On Wed, Aug 26, 2015 at 4:07 AM, PaulYCDong [email protected] wrote:
Ed,
Many thanks for pointing me to the right direction, and your progress is impressive.
I think the constructor of the datasource - ColumnDataSource have some issue with pandas dataframe.
So I just get my hands dirty and specified all the columns I want to use in the chart, and miraculously, it worked!
my code as below. But I really hope someone could do a quick tutorial rendering the same plot under different interfaces, chart, plotting and models, so that we can fully appreciate how the inner workings of Bokeh, and what would be the defaults used in higher level charts, then to amend them.
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure, ColumnDataSource
from datetime import datetime
output_notebook()
PATH = ‘’
filename = ‘test_RR.xlsx’
df = pd.read_excel(PATH + filename)
df[‘TA_END’] = np.where(pd.isnull(df[‘ET Date’]), df.L_END, np.where(df[‘ET Date’] < df.L_END, df[‘ET Date’], df.L_END))
GFA_SCALE_FACTOR = 2
df[‘GFA_radius’] = np.sqrt( df.GFA / np.pi ) * GFA_SCALE_FACTOR
import seaborn as sns
colors = list(sns.cubehelix_palette(28, start=.5, rot=-.75))
hex_colors = np.array([‘#%02x%02x%02x’ % (c[0]*255, c[1]*255, c[2]*255) for c in colors])
df[‘color’] = hex_colors[df.FL - 4]
from bokeh.models import HoverTool
source = ColumnDataSource(
data=dict(
x=df.TA_END,
y=df[‘Eff Rent’],
size = df.GFA_radius,
color = df.color,
name = df.name,
L_START = [t.strftime(‘%m/%d/%Y’) for t in df.L_START]
)
)
TOOLS=“pan,wheel_zoom,reset,hover,poly_select,box_select”
p = figure(x_axis_type=“datetime”, width = 800, height = 400, tools = TOOLS)
p.circle(x = ‘x’, y= ‘y’,
size= ‘size’,
line_width=0.5, line_alpha=0.5, color = ‘color’, source = source)
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
(“name”, “@name”),
(“lease start”, “@L_START”),
]
show(p)
On Tuesday, 25 August 2015 21:47:03 UTC+8, Ed Oxenham wrote:
Paul,
Glad to have helped. I’m only a few minutes ahead of you in the learning curve with Bokeh - I started looking at it on Friday!
I tried your code - stripping out the hover tool stuff and got the same error as you on both python 2.7.6 and 3.4.
Clearly the problem is with the introduction of the datasource (as the code works with raw pandas dataframe columns). I can’t see anything that you have done wrong as I have almost identical code that plots pandas datetimes via a datasource without any problem.
It looks like it might be a bug. Hopefully someone with more experience in Bokeh will take a look at this post and point out the (inevitably) obvious thing we are both missing.
regards
Ed
On Tuesday, 25 August 2015 11:43:53 UTC+1, PaulYCDong wrote:
Many thanks, Ed!
With your generous help, I have managed to complete the task I set out to do from 1-4.
However, I realised still face a pretty steep learning curve with regard to the Hovertool - It is the original reason that I am inspired to learn Bokeh.
the codes that works:
Setting things up :-
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure, ColumnDataSource
from datetime import datetime
output_notebook()
PATH = ‘’
filename = ‘test_RR.xlsx’
df = pd.read_excel(PATH + filename)
df[‘TA_END’] = np.where(pd.isnull(df[‘ET Date’]), df.L_END, np.where(df[‘ET Date’] < df.L_END, df[‘ET Date’], df.L_END)) # just some data cleaning, don’t bother with this
GFA_SCALE_FACTOR = 2
df[‘GFA_radius’] = np.sqrt( df.GFA / np.pi ) * GFA_SCALE_FACTOR
import seaborn as sns
colors = list(sns.cubehelix_palette(28, start=.5, rot=-.75))
hex_colors = np.array([‘#%02x%02x%02x’ % (c[0]*255, c[1]*255, c[2]*255) for c in colors])
df[‘color’] = hex_colors[df.FL - 4]
``
p = figure(x_axis_type=“datetime”, width = 800, height = 400)
p.circle(x=df[‘TA_END’], y=df[‘Eff Rent’],
size= df[‘GFA_radius’]* GFA_SCALE_FACTOR,
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color = df[‘color’])
show(p)
``
However, when I tried below code to add a hover tool, I hit the same month must be between 1…12 error:
from bokeh.models import HoverTool
source = ColumnDataSource(df)
hover = HoverTool(
tooltips=[
(“name”, “@name”),
(“(x,y)”, “($x, $y)”),
(“lease start”, “@L_START”),
]
)
p = figure(x_axis_type=“datetime”, width = 800, height = 400)
p.circle(x=‘TA_END’, y=‘Eff Rent’,
size= ‘GFA_radius’,
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color = ‘color’, source = source)
show(p)
``
The frustrating part is, I am not even trying to add the hovertool, just trying to do the same chart as above, then I hit the error:
ValueError: month must be in 1..12
My guess is that something went wrong, when setting us the xaxis as datetime…
Appreciate any help in solving this, thanks!
On Tuesday, 25 August 2015 16:31:53 UTC+8, Ed Oxenham wrote:
Paul,
With regards to your first question.
Based on my limited experimentation - the ‘size’ parameter of a circle is the radius defined in absolute screen units. This does not scale up as you zoom in.
However, the ‘radius’ parameter defines the radius of the circle relation to the axes units.
Try zooming and playing with the examples below to understand the difference.
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()
p1 = figure(width = 800, height = 400)
p1.circle(x=[1,2,3,4,5], y=[1,2,3,4,5],
size=[2,4,8,16,32],
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color=‘red’)
p2 = figure(width = 800, height = 400)
p2.circle(x=[1,2,3,4,5], y=[1,2,3,4,5],
radius=[.1,.2,.3,.4,.5],
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color=‘red’)
show(p1)
show(p2)
``
In the case of your question 1 - you need to use screen units because the GFA variable is independent of the axes.
The example below should help you to work it out.
Importing libs and data
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()
PATH = ‘’
filename = ‘test_RR.xlsx’
df = pd.read_excel(PATH + filename)
df.TA_END = pd.to_datetime(df.TA_END)
df[‘GFA_radius’] = np.sqrt( df.GFA / np.pi )
GFA_SCALE_FACTOR = 2
p = figure(x_axis_type=“datetime”, width = 800, height = 400)
p.circle(x=df.TA_END, y=df[‘Eff Rent’],
size=df.GFA_radius * GFA_SCALE_FACTOR,
fill_alpha=0.8, line_width=0.5, line_alpha=0.5, color=‘red’)
show(p)
``
regards
Ed
On Tuesday, 25 August 2015 02:50:13 UTC+1, PaulYCDong wrote:
First of all, Bokeh is a great library and thanks to all who help the beginners in this group.
I am trying to build a bubble chart for a batch of commercial leases where:
- x axis represent the date the lease will end (Datetime: 2015 - 2020)
- y axis represent the rent level (Float: 200- 500)
- size / radius of the circles represent the size of the premise (Float: 200 - 8,000 - GFA)
- color of the circle represent the floor # (Integer: 1 - 40)
- add hovertool to display metadata of the lease when mouseover the bubble
I think I can manage the #1 and #2, but #3 poses a challenge, below is my 1st attempt at bokeh.plotting interface:
Importing libs and data
import pandas as pd
import numpy as np
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from datetime import datetime
output_notebook()
PATH = ‘’
filename = ‘test_RR.xlsx’
df = pd.read_excel(PATH + filename)
``
#Sample data below (also in attachment):
In [35]:
df
Out[35]:
name
L_START
L_END
ET Date
Eff Rent
FL
GFA
TA_END
0
a Int’l
2013-05-05
2016-04-05
NaT
380
14
2000
2016-04-05
1
b dom
2010-06-05
2017-05-05
2015-12-31
275
5
500
2015-12-31
#my attempt to normalize the GFA (size of the premise)
df[‘radius’] = ((df.GFA - df.GFA.mean()) / df.GFA.std() * 40 ) + 40
Bokeh Plotting
p = figure(x_axis_type=“datetime”, width = 800, height = 400)
p.circle(df.TA_END, df[‘Eff Rent’], radius = df.radius, fill_alpha=0.8, line_width=0.5, line_alpha=0.5)
#but i only get a blank plot, after show(p)
show(p)
#I can get some results if I use size parameter instead of radius, but I still don’t know how to scale the size parameter properly: was it based on on-screen pixels or any of the value axis?
Bokeh Model (I hope I can learn how to use the Model interface as well, as it looks pretty cool)
df.TA_END = df.TA_END.astype(datetime)
type(df.TA_END[0])
from bokeh.models import ColumnDataSource, Circle, HoverTool
chart_df = ColumnDataSource(df)
circle_glyph = Circle(x =
``
…