Hi, I have a loop that's producing HTML via Bokeh, and I've noticed something ve…ry strange where the HTML files are constantly growing in size, despite the fact that the plotted data is always about the same size (hundreds of kB to a few MB per dataset). Eventually the files are 500MB in size for ~1MB datasets!
What's strange is that the generated HTML still visually produces the right plot for each dataset, it's not like I can see all the previous datasets up to then. Here's the first plot generated in the loop: https://sne.space/sne/SN1993ac.html
...and here's the plot generated 20th in the loop (which is already 100x bigger in file size than the first plot!), note that this plot actually has _less data_ than the first example: https://sne.space/sne/SN1996ac.html
I have verified that if I generate that 20th plot first that its filesize is normal. Below is my code. Does anyone have any idea what's going on?
``` Python
if spectraavail and dohtml and args.writehtml:
spectrumwave = []
spectrumflux = []
spectrumerrs = []
for spectrum in catalog[entry]['spectra']:
specrange = range(len(spectrum['data']))
spectrumwave.append([float(spectrum['data'][x][0]) for x in specrange])
spectrumflux.append([float(spectrum['data'][x][1]) for x in specrange])
if 'errorunit' in spectrum:
spectrumerrs.append([float(spectrum['data'][x][2]) for x in specrange])
y_height = 0.
y_offsets = [0. for x in range(len(spectrumwave))]
for i in reversed(range(len(spectrumwave))):
y_offsets[i] = y_height
ydiff = max(spectrumflux[i]) - min(spectrumflux[i])
spectrumflux[i] = [j + y_height for j in spectrumflux[i]]
y_height += ydiff
maxsw = max(map(max, spectrumwave))
minsw = min(map(min, spectrumwave))
maxfl = max(map(max, spectrumflux))
minfl = min(map(min, spectrumflux))
maxfldiff = max(map(operator.sub, list(map(max, spectrumflux)), list(map(min, spectrumflux))))
x_buffer = 0.0 #0.1*(maxsw - minsw)
x_range = [-x_buffer + minsw, x_buffer + maxsw]
y_buffer = 0.1*maxfldiff
y_range = [-y_buffer + minfl, y_buffer + maxfl]
for f, flux in enumerate(spectrumflux):
spectrumflux[f] = [x - y_offsets[f] for x in flux]
tt2 = [
("λ", "@x{1.1}"),
("Flux", "@y0"),
("Epoch (" + spectrum['timeunit'] + ")", "@epoch{1.11}"),
("Source", "@src")
]
hover2 = HoverTool(tooltips = tt2)
p2 = Figure(title='Spectra for ' + eventname, x_axis_label=label_format('Wavelength (' + catalog[entry]['spectra'][0]['waveunit'] + ')'),
y_axis_label=label_format('Flux (' + catalog[entry]['spectra'][0]['fluxunit'] + ')' + ' + offset'
if (len(catalog[entry]['spectra']) > 1) else ''), x_range = x_range, tools = tools,
y_range = y_range)
p2.add_tools(hover2)
colors = brewer["Spectral"]
colors = colors[min(max(min(colors.keys()),len(spectrumwave)),max(colors.keys()))]
sources = []
for i in range(len(spectrumwave)):
sources.append(ColumnDataSource(
data = dict(
x0 = spectrumwave[i],
y0 = spectrumflux[i],
x = spectrumwave[i],
y = [y_offsets[i] + j for j in spectrumflux[i]],
yoff = [y_offsets[i]],
binsize = [1.0],
spacing = [1.0],
src = [catalog[entry]['spectra'][i]['source'] for j in spectrumflux[i]],
epoch = [catalog[entry]['spectra'][i]['time'] for j in spectrumflux[i]]
)
))
p2.line('x', 'y', source=sources[i], line_color=str(colors[i % len(colors)]), line_width=2)
sdicts = dict(zip(['s'+str(x) for x in range(len(sources))], sources))
callback = CustomJS(args=sdicts, code="""
for (s = 0; s < """ + str(len(sources)) + """; s++) {
var data = eval('s'+s).get('data');
if (cb_obj.get('title') == 'Spacing') {
data['spacing'][0] = cb_obj.get('value');
} else {
data['binsize'][0] = cb_obj.get('value');
}
var f = data['binsize'][0]
var space = data['spacing'][0]
var x0 = data['x0'];
var y0 = data['y0'];
var dx0 = x0[1] - x0[0];
var yoff = space*data['yoff'][0];
data['x'] = [x0[0] - 0.5*Math.max(0., f - dx0)];
data['y'] = [y0[0] + yoff];
var xaccum = 0.;
var yaccum = 0.;
for (i = 0; i < x0.length; i++) {
var dx;
if (i == 0) {
dx = x0[i+1] - x0[i];
} else {
dx = x0[i] - x0[i-1];
}
xaccum += dx;
yaccum += y0[i]*dx;
if (xaccum >= f) {
data['x'].push(data['x'][data['x'].length-1] + xaccum);
data['y'].push(yaccum/xaccum + yoff);
xaccum = 0.;
yaccum = 0.;
}
}
eval('s'+s).trigger('change');
}
""")
binslider = Slider(start=0, end=20, value=1, step=0.5, title=label_format("Bin size (Angstrom)"), callback=callback)
spacingslider = Slider(start=0, end=2, value=1, step=0.02, title=label_format("Spacing"), callback=callback)
#if (photoavail or spectraavail) and dohtml and args.writehtml:
if (photoavail and spectraavail) and dohtml and args.writehtml:
if photoavail and spectraavail:
p = HBox(vplot(hplot(p1,p2,vform(binslider,spacingslider))))
elif photoavail:
p = p1
else:
p = p2
html = file_html(p, CDN, eventname)
returnlink = r'<br><a href="https://sne.space"><< Return to supernova catalog</a>'
repfolder = get_rep_folder(catalog[entry])
html = re.sub(r'(\<\/body\>)', r'<a href="https://cdn.rawgit.com/astrotransients/' + repfolder + '/master/' + eventname + r'.json" download>Download datafile</a><br><br>\n\1', html)
if len(catalog[entry]['sources']):
html = re.sub(r'(\<\/body\>)', r'<em>Sources of data:</em><br><table><tr><th width=30px>ID</th><th>Source</th></tr>\n\1', html)
for source in catalog[entry]['sources']:
html = re.sub(r'(\<\/body\>)', r'<tr><td>' + source['alias'] +
r'</td><td>' + source['name'].encode('ascii', 'xmlcharrefreplace').decode("utf-8") +
r'</td></tr>\n\1', html)
html = re.sub(r'(\<\/body\>)', r'</table>\n\1', html)
html = re.sub(r'(\<\/body\>)', returnlink+r'\n\1', html)
print(outdir + eventname + ".html")
with open(outdir + eventname + ".html", "w") as f:
f.write(html)
```