ImageURL plots outputs a blank page

I’m practicing high dimensional dataset reduction techniques where the data is a file of 121 images. The problem I’m having is that when I run the command from the terminal, python -m bokeh serve --show, it returns a blank page. I think I messed up my callback functions or my plots but I can’t quite see the issue and how to solve it.

Heres what my code look likes.

import glob
import os
import numpy as np
from PIL import Image

from sklearn.manifold import TSNE
from umap import UMAP
from sklearn.decomposition import PCA

from bokeh.plotting import figure, curdoc
from bokeh.models import ColumnDataSource, Slider, ImageURL
from bokeh.layouts import layout, row, column

This is where I preprocess the data:

# Fetch the number of images using glob or some other path analyzer
N = len(glob.glob('static/*.jpg'))

# Find the root directory of your app to generate the image URL for the bokeh server
ROOT = os.path.split(os.path.abspath(os.path.dirname('__file__')))[1] + '/'

# Number of bins per color for the 3D color histograms
N_BINS_COLOR = 15

# Define an array containing the 3D color histograms. We have one histogram per image each having N_BINS_COLOR^3 bins.
# i.e. an N * N_BINS_COLOR^3 array
color_histograms = np.zeros((N,N_BINS_COLOR**3),dtype=int)

# initialize an empty list for the image file paths
url_list = []

# Compute the color and channel histograms
for idx, f in enumerate(glob.glob('static/*.jpg')):
    # open image using PILs Image package
    img = Image.open(f)

    # Convert the image into a numpy array and reshape it such that we have an array with the dimensions (N_Pixel, 3)
    a = np.asarray(img)
    a = a.reshape(a.shape[0]*a.shape[1], 3)

    # Compute a multi dimensional histogram for the pixels, which returns a cube
    H, edges = np.histogramdd(a,bins=N_BINS_COLOR)

    # However, later used methods do not accept multi dimensional arrays, so reshape it to only have columns and rows
    # (N_Images, N_BINS^3) and add it to the color_histograms array you defined earlier
    color_histograms[idx] = H.reshape(1, N_BINS_COLOR**3)
    
    # Append the image url to the list for the server
    url = ROOT + f
    url_list.append(url)

def compute_umap(n_neighbors=15) -> np.ndarray:
    """performes a UMAP dimensional reduction on color_histograms using given n_neighbors"""
    # compute and return the new UMAP dimensional reduction
    reduced_umap = UMAP(n_neighbors=n_neighbors).fit_transform(color_histograms)
    pass

    return reduced_umap

def on_update_umap(old, attr, new):
    """callback which computes the new UMAP mapping and updates the source_umap"""
    # Compute the new umap using compute_umap
    source_umap = compute_umap() 

    # update the source_umap
    source_umap.data = source_umap

    pass

def compute_tsne(perplexity=4, early_exaggeration=10) -> np.ndarray:
    """performes a t-SNE dimensional reduction on color_histograms using given perplexity and early_exaggeration"""
    # compute and return the new t-SNE dimensional reduction
    reduced_tsne = TSNE(perplexity=perplexity,early_exaggeration=early_exaggeration).fit_transform(color_histograms)
    pass

    return reduced_tsne

def on_update_tsne(old, attr, new):
    """callback which computes the new t-SNE mapping and updates the source_tsne"""
    # Compute the new t-sne using compute_tsne
    source_tsne = compute_tsne()

    # update the source_tsne
    source_tsne.data = source_tsne
    
    pass

This is where I create my column data source:

# Calculate the indicated dimensionality reductions
pca_reduce = PCA(n_components=2).fit_transform(color_histograms)
tsne_reduce = compute_tsne()
umap_reduce = compute_umap()

# Construct three data sources, one for each dimensional reduction,
# each containing the respective dimensional reduction result and the image paths
source_pca = ColumnDataSource(dict(url=url_list,x=pca_reduce[:,0],y=pca_reduce[:,1]))
source_tsne = ColumnDataSource(dict(url=url_list,x=tsne_reduce[:,0],y=tsne_reduce[:,1]))
source_umap = ColumnDataSource(dict(url=url_list,x=umap_reduce[:,0],y=umap_reduce[:,1]))

This is where I create my plots

# Create a first figure for the PCA data. Add the wheel_zoom, pan and reset tools to it.
# And use bokehs image_url to plot the images as glyphs
pca_plot = figure(title='PCA',tools='wheel_zoom,pan,reset')
pca_plot.add_glyph(source_pca,ImageURL(url='url',x='x',y='y'))

# Create a second plot for the t-SNE result in the same fashion as the previous.
tsne_plot = figure(title='t-SNE',tools='wheel_zoom,pan,reset')
tsne_plot.add_glyph(source_tsne,ImageURL(url='url',x='x',y='y'))

# Create a third plot for the UMAP result in the same fashion as the previous.
umap_plot = figure(title='UMAP',tools='wheel_zoom,pan,reset')
umap_plot.add_glyph(source_umap,ImageURL(url='url',x='x',y='y'))

This is where I set my callbacks to update perplexity, early_exaggeration and n_neighbours

# Create a callback, such that whenever the value of the slider changes, on_update_tsne is called.
def tsne_callback(attr, old, new):
    
    perplexity = slider_perp.value_throttled
    source_tsne.data = on_update_tsne(perplexity=perplexity)
    
    pass

# Create a slider to control the t-SNE hyperparameter "perplexity" with a range from 2 to 20 and a title "Perplexity"
slider_perp = Slider(start=2, end=20, value=2, step=1, title='Perplexity')
slider_perp.on_change('value',tsne_callback)

# Create a second slider to control the t-SNE hyperparameter "early_exaggeration"
# with a range from 2 to 50 and a title "Early Exaggeration"
slider_early = Slider(start=2, end=50, value=2, step=1, title='Early Exaggeration')

# Connect it to the on_update_tsne callback in the same fashion as the previous slider
slider_early.on_change('value',on_update_tsne)

# Create a third slider to control the UMAP hyperparameter "n_neighbors"
slider_neigh = Slider(start=2, end=20, value=15, step=1, title='N Neighbors')

# Connect it to the on_update_umap callback in the same fashion as the previous slider
slider_neigh.on_change('value',on_update_umap)

lt = layout(column(row(pca_plot,tsne_plot,umap_plot),row(slider_perp,slider_early,slider_neigh)))
curdoc().add_root(lt)
curdoc().title = 'Title'

Expected output should look like so

Are there any warnings or errors in either:

  • the bokeh serve output
  • the browser’s JavaScript console log

There no errors or warnings on bokeh serve in the terminal. I’m not sure about the JS log since I’m using my phone right now and away from the computer. Another thing though, when I execute show(lt), I get the plot as expected like above but without the images shown. I know show(lt) doesn’t work with non JS callback functions but it usually shows the initial plot. Maybe there’s something wrong in my data format?

@Nut I’m not sure, but since I can’t actually run your code directly (missing data), the first place to look for clues is the browser’s JavaScript console, when you get a chance.

Ok will do. You could technically test it out yourself. All you have to do is create a folder called static in the same directory as the code above and add some .jpg files to that folder. The requirements are numpy, pillow, bokeh, scikit-learn and umap-learn
@Bryan

Over the last ten years I have volunteered time to help with many thousands of questions. I love Bokeh, and I love helping Bokeh users, but over that time scale, filling in missing bits of problems really adds up. Speaking frankly, at this point I am willing to:

  • clone a complete repo
  • copy and paste complete code

Anything else I will have to leave to others :slight_smile:

1 Like

Ok so I looked at JS console and this is what I see


Any thoughts?

Well, that’s too bad, that just looks like a normal connection and render.

A few random observations just looking at the code:

  • You appear to be defining specific data sources, e.g source_pca, but then in the callbacks you reference source.data That definitely seems like a bug? I would expect error messages in the server log if the sliders are used. But that would not explain an initial empty page.

  • All the pass statements are no-ops and just code noise. The “minimal” in Minimal Reproducible Example is very important to help others help you

  • Same for comments like "# return..." above a return — just noise that gets in the way of quickly scanning the code

  • You are calling on_change on "value" but then referring to value_throttle inside the callbacks. It’s not technically a mistake, but I can’t think of any case where you would not want those to match. Right now the callback will trigger on every slider move, but not use the current latest value, which would be very strange behavior to want.

If I were trying to debug this, at this point I would strip away as much as possible and try to start from something smaller. i.e. pare things down to a single plot and slider, and see if that does or does not work. Oftentimes just the act of doing that alone will reveal an issue to you, or in any case, make a better MRE for folks trying to help.

Ok will pick at it more. Let me know if you would like me to send you the static file

So I tried running python -m bokeh serve --show to test my luck and in the JS console I see this error


And my cmd terminal looks like so

(base) C:\some\path\name\idk>python -m bokeh serve --show
2022-04-01 09:17:03,850 Starting Bokeh server version 2.3.2 (running on Tornado 6.1)
2022-04-01 09:17:03,882 User authentication hooks NOT provided (default user enabled)
2022-04-01 09:17:03,882 Bokeh app running at: http://localhost:5006/
2022-04-01 09:17:03,882 Starting Bokeh server with process id: 6876
2022-04-01 09:17:06,200 WebSocket connection opened
2022-04-01 09:17:06,201 ServerConnection created
2022-04-01 09:17:06,205 404 GET /favicon.ico (::1) 0.00ms
2022-04-01 09:22:05,627 404 GET /favicon.ico (::1) 1.00ms

That error is harmless, unfortunately. It’s just noting there is not favicon available to display.

Ok will pick at it more. Let me know if you would like me to send you the static file

If there is a complete (code + data) zip file I can download or repo I can clone, I am happy to take a closer look.

Ok so I found my problem. Unfortunately I was typing python -m bokeh serve --show instead of python -m bokeh serve --show .

That is I forgot the period at the end lol.

Anyways now thats out of the way, I have to customize my callback functions to update the plots properly. Any suggestions?
@Bryan

Here is my updated code

import glob
import os
import numpy as np
from PIL import Image

from sklearn.manifold import TSNE
from umap import UMAP
from sklearn.decomposition import PCA

from bokeh.plotting import figure, curdoc
from bokeh.models import ColumnDataSource, Slider, ImageURL
from bokeh.layouts import layout, row, column

Preprocessing

# Fetch the number of images using glob or some other path analyzer
N = len(glob.glob('static/*.jpg'))

# Find the root directory of your app to generate the image URL for the bokeh server
ROOT = os.path.split(os.path.abspath(os.path.dirname('__file__')))[1] + '/'

# Number of bins per color for the 3D color histograms
N_BINS_COLOR = 15

# Define an array containing the 3D color histograms. We have one histogram per image each having N_BINS_COLOR^3 bins.
# i.e. an N * N_BINS_COLOR^3 array
color_histograms = np.zeros((N,N_BINS_COLOR**3),dtype=int)

# initialize an empty list for the image file paths
url_list = []

# Compute the color and channel histograms
for idx, f in enumerate(glob.glob('static/*.jpg')):
    # open image using PILs Image package
    img = Image.open(f)

    # Convert the image into a numpy array and reshape it such that we have an array with the dimensions (N_Pixel, 3)
    a = np.asarray(img)
    a = a.reshape(a.shape[0]*a.shape[1], 3)

    # Compute a multi dimensional histogram for the pixels, which returns a cube
    H, edges = np.histogramdd(a,bins=N_BINS_COLOR)

    # However, later used methods do not accept multi dimensional arrays, so reshape it to only have columns and rows
    # (N_Images, N_BINS^3) and add it to the color_histograms array you defined earlier
    color_histograms[idx] = H.reshape(1, N_BINS_COLOR**3)
    
    # Append the image url to the list for the server
    url = ROOT + f
    url_list.append(url)

def compute_umap(n_neighbors=15) -> np.ndarray:
    """performes a UMAP dimensional reduction on color_histograms using given n_neighbors"""
    # compute and return the new UMAP dimensional reduction
    umap_reduce = UMAP(n_neighbors=n_neighbors).fit_transform(color_histograms)
    pass
    
    return umap_reduce

def on_update_umap(old, attr, new):
    """callback which computes the new UMAP mapping and updates the source_umap"""
    # Compute the new umap using compute_umap
    n_neighbors = slider_neigh.value_throttled
    umap_reduce = compute_umap(n_neighbors=n_neighbors) 

    # update the source_umap
    source_umap.data = ColumnDataSource(dict(url=url_list,x=umap_reduce[:,0],y=umap_reduce[:,1]))

    pass

def compute_tsne(perplexity=4, early_exaggeration=10) -> np.ndarray:
    """performes a t-SNE dimensional reduction on color_histograms using given perplexity and early_exaggeration"""
    # compute and return the new t-SNE dimensional reduction
    reduced_tsne = TSNE(perplexity=perplexity,early_exaggeration=early_exaggeration).fit_transform(color_histograms)
    pass
    
    return reduced_tsne

def on_update_tsne(old, attr, new):
    """callback which computes the new t-SNE mapping and updates the source_tsne"""
    # Compute the new t-sne using compute_tsne
    early_exaggeration = slider_early.value_throttled
    source_tsne = compute_tsne(early_exaggeration=early_exaggeration)

    # update the source_tsne
    source_tsne.data = source_tsne
    
    pass

Section ColumnDataSources

# Calculate the indicated dimensionality reductions
pca_reduce = PCA(n_components=2).fit_transform(color_histograms)
tsne_reduce = compute_tsne()
umap_reduce = compute_umap()

# Construct three data sources, one for each dimensional reduction,
# each containing the respective dimensional reduction result and the image paths
source_pca = ColumnDataSource(dict(url=url_list,x=pca_reduce[:,0],y=pca_reduce[:,1]))
source_tsne = ColumnDataSource(dict(url=url_list,x=tsne_reduce[:,0],y=tsne_reduce[:,1]))
source_umap = ColumnDataSource(dict(url=url_list,x=umap_reduce[:,0],y=umap_reduce[:,1]))

Section Plots

# Create a first figure for the PCA data. Add the wheel_zoom, pan and reset tools to it.
# And use bokehs image_url to plot the images as glyphs
pca_plot = figure(title='PCA',tools='wheel_zoom,pan,reset')
pca_plot.add_glyph(source_pca,ImageURL(url='url',x='x',y='y',w=20000,h=10000))

# Create a second plot for the t-SNE result in the same fashion as the previous.
tsne_plot = figure(title='t-SNE',tools='wheel_zoom,pan,reset')
tsne_plot.add_glyph(source_tsne,ImageURL(url='url',x='x',y='y',w=200,h=100))

# Create a third plot for the UMAP result in the same fashion as the previous.
umap_plot = figure(title='UMAP',tools='wheel_zoom,pan,reset')
umap_plot.add_glyph(source_umap,ImageURL(url='url',x='x',y='y',w=3,h=1))

# Create a slider to control the t-SNE hyperparameter "perplexity" with a range from 2 to 20 and a title "Perplexity"
slider_perp = Slider(start=2, end=20, value=2, step=1, title='Perplexity')

Section Callbacks

# Create a callback, such that whenever the value of the slider changes, on_update_tsne is called.
def tsne_callback(attr, old, new):
    
    perplexity = slider_perp.value_throttled
    source.data = on_update_tsne(perplexity=perplexity)
    
    pass

slider_perp.on_change('value_throttled',tsne_callback)

# Create a second slider to control the t-SNE hyperparameter "early_exaggeration"
# with a range from 2 to 50 and a title "Perplexity"
slider_early = Slider(start=2, end=50, value=2, step=1, title='Early Exaggeration')

# Connect it to the on_update_tsne callback in the same fashion as the previous slider
slider_early.on_change('value_throttled',on_update_tsne)

# Create a third slider to control the UMAP hyperparameter "n_neighbors"
slider_neigh = Slider(start=2, end=20, value=15, step=1, title='N Neighbors')

# Connect it to the on_update_umap callback in the same fashion as the previous slider
slider_neigh.on_change('value_throttled',on_update_umap)

lt = layout(row(pca_plot,column(tsne_plot,slider_perp,slider_early),column(umap_plot,slider_neigh)))
curdoc().add_root(lt)
curdoc().title = 'Title'

I believe I’ll have to call ColumnDataSource inside the callback functions.

Right now I’m testing the umap callback, and when I select a new value for n_neighbors widget, I get the error:
ValueError: failed to validate ColumnDataSource(id='1004', ...).data: expected an element of ColumnData(String, Seq(Any)), got ColumnDataSource(id='1205', ...)

I changed this

source_umap.data = ColumnDataSource(dict(url=url_list,x=umap_reduce[:,0],y=umap_reduce[:,1]))

to this

source_umap.data.update((ColumnDataSource(dict(url=url_list,x=umap_reduce[:,0],y=umap_reduce[:,1]))).data)

which did the trick.

1 Like

Last question @Bryan , instead of setting the image glyphs to a specific width and height, is there a way to scale the coordinates automatically to a pleasant size?

@Nut I am not really sure what the question is asking you can set w_units and/or h_units to be "screen" (pixels) or "data" (data-space distance). But you always have to set your own value, regardless of units.