Upload file, clean the file, output to Downloads (like one of the bokeh examples)

Hi,

I am having issues trying to figure out how to download a file like done in this example:

So the user uploads a file, which is very messy, the code cleans it and should output a file downloaded to downloads folder as the example from the bokeh gallery.
Anyone done something similar with a Pandas Dataframe that could show me an example?

from bokeh.io import curdoc
from bokeh.models.widgets import FileInput
import pandas as pd
from bokeh.models import CustomJS
from pybase64 import b64decode
import io
from flask import send_file, session, make_response, Response
from os.path import dirname, join

def upload_csv_file(attr, old, new):
    print('uploading csv file...')
    file = io.BytesIO(b64decode(new))
    wttdata = pd.read_csv(file, sep = ',', header = 0)
    headerData = wttdata[:4]
    wttdata = wttdata.replace('x', '', regex=True)
    wttdata = wttdata.groupby(['Date time'], as_index=False).agg('first')#.reset_index()    
    result = pd.concat([headerData, wttdata], axis=0)
    print(result)
    #resp = make_response(result.to_csv('test_file.csv', index = None, header=True))
    #resp.headers["Content-Disposition"] = "attachment; filename=export.csv"
    #resp.headers["Content-Type"] = "text/csv"
    #result.to_csv('test_file.csv', index = None, header=True)
    return Response(
       result.to_csv('test_file.csv', index = None, header=True),
       mimetype="text/csv",
       headers={"Content-disposition":
       "attachment; filename=filename.csv"})

def fileConverter(doc):
    inputFile = FileInput(accept=".csv,.json,.txt")
    inputFile.on_change('value', upload_csv_file)
    
    doc.add_root(inputFile)

Hi,

Maybe convert the pandas df to a dictionary and use the same setup at the example you refer to?

Or use the flask send_file which you have in your imports (I have no experience, but found a SO thread using this.)

Hi Jonas,

Thanks for taking the time to respond. So I did the first part you mentioned, and it does export the data. But exports it not identical to how pandas does. I assume the download.js file needs to be modified. Maybe this is not something that is related to bokeh. But there is a formula inside the download.js code (table_to_csv). I would need to modify this one so the format comes out correctly. I will need to dig a little more to find out how to get the exact result in csv format as result.to_csv(‘test_file.csv’, index = None, header=True).

from bokeh.io import curdoc
from bokeh.models.widgets import FileInput, Button
import pandas as pd
from bokeh.models import CustomJS, ColumnDataSource
from pybase64 import b64decode
import io
from flask import send_file, session, make_response, Response
from app import app
from os.path import dirname, join
from bokeh.plotting import column

source = ColumnDataSource(data = dict())

def upload_csv_file(attr, old, new):
    print('uploading csv file...')
    file = io.BytesIO(b64decode(new))
    wttdata = pd.read_csv(file, sep = ',', header = 0)
    headerData = wttdata[:4]
    wttdata = wttdata.replace('x', '', regex=True)
    wttdata = wttdata.groupby(['Date time'], as_index=False).agg('first')#.reset_index()    
    result = pd.concat([headerData, wttdata], axis=0)
    #result.to_csv('test_file.csv', index = None, header=True)
    source.data = dict(result)


def baseConverter(doc):
    inputFile = FileInput(accept=".csv,.json,.txt")
    button = Button(label="Export Data", button_type="success")

    inputFile.on_change('value', upload_csv_file)
    button.js_on_event("button_click", CustomJS(args=dict(source=source),
                                code=open(join(dirname(__file__), "download.js")).read()))

    
    layout = column(inputFile, button)
    doc.add_root(layout)

I think the problem with this js code is that it sorts the columns alphabetically.
Is there a way to make the js code just convert the dict (or pandas dataframe) as is?

function table_to_csv(source) {
    const columns = Object.keys(source.data)
    console.log(columns)
    const nrows = source.get_length()
    console.log(nrows)
    const lines = [columns.join(',')]
   console.log(lines)

    for (let i = 0; i < nrows; i++) {
        let row = [];
        for (let j = 0; j < columns.length; j++) {
            const column = columns[j]
            row.push(source.data[column][i].toString())
        }
        lines.push(row.join(','))
    }
    return lines.join('\n').concat('\n')
}


const filename = 'data_result.csv'
const filetext = table_to_csv(source)
const blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' })

//addresses IE
if (navigator.msSaveBlob) {
    navigator.msSaveBlob(blob, filename)
} else {
    const link = document.createElement('a')
    link.href = URL.createObjectURL(blob)
    link.download = filename
    link.target = '_blank'
    link.style.visibility = 'hidden'
    link.dispatchEvent(new MouseEvent('click'))
}

I think in your python code, source.data = dict(result) is not correct. Either you can do source.data = result.to_dict('list') or source.data = result but then you need to remove the index.

Example:

>>> data = {'a': [1, 2, 3, 4],'b': [30,44,55,22]}
>>> df = pd.DataFrame(data)
>>> src = ColumnDataSource(data=dict())
>>> src.data = df
>>> src.data
{'index': array([0, 1, 2, 3]), 'a': array([1, 2, 3, 4]), 'b': array([30, 44, 55, 22])}
>>> src.data = df.to_dict('list')
>>> src.data
{'a': [1, 2, 3, 4], 'b': [30, 44, 55, 22]}
1 Like

I am not sure that solves the problem. The file headers are important and the download.js code is rearranging them, not sure converting to dict via pandas helps that issue.

I do not think the js code Object.keys(source.data) does any sorting. You can test out on MDN.

But I guess result.to_csv('test_file.csv', index = None, header=True) gives the correct format?

Other than this I do not have any good suggestions.

@Zana how opposed are you to using external JS resources? I’ve set up my cds_to_csv function using PapaParse.

Basically you just ensure PapaParse is embedded/imported, embed this function in the CustomJS or elsewhere, and call it in CustomJS with the cds.data and the filename you want as the args:


function cds_to_csv(cds_data,filename){
    var d = {'fields':[],'data':[]}
    for (const [k,v] of Object.entries(cds_data)){
        d['fields'].push(k)
        d['data'].push(v)
        }
    //need to transpose data for Papa
     d['data'] = d['data'][0].map((_, colIndex) => d['data'].map(row => row[colIndex]))
	 //d3 alternative
	 // d['data'] = d3.transpose(d['data')
    var csv = Papa.unparse(d)
    var csvData = new Blob([csv], {type: 'text/csv;charset=utf-8;'});
    var elem = window.document.createElement('a');
    elem.href = window.URL.createObjectURL(csvData);
    elem.download = filename+'.csv';
    document.body.appendChild(elem);
    elem.click();
    document.body.removeChild(elem)
    }

Hi,

Thanks for the help! I do not mind using external JS resources, in fact I would really like to learn how :smiley: Do you add the papaparse.js code to you repos? Or do you link it somehow?

Ok managed to get it working using PapaParse. Added a colnames (which is in the order I need it to be). Added the PapaParse script tag to the HTML template file. And wrote the following in the download.js file:

function cds_to_csv(cds_data, filename) {
    var d = {'fields':[],'data':[]}
    const columns = colnames.data['columns']
    console.log(columns)
    
    for (let j = 0; j < columns.length; j++) {
        const column = columns[j]
        d['fields'].push(column)
        var data = cds_data.data[column]
        const newArr = data.map(item => item === "NaN" ? '' : item);
        d['data'].push(newArr)
        // console.log(column, data, newArr) 
        }
        
    // Need to transpose data for Papa
    d['data'] = d['data'][0].map((_, colIndex) => d['data'].map(row => row[colIndex]))
    
    var csv = Papa.unparse(d)
    var csvData = new Blob([csv], {type: 'text/csv;charset=utf-8;'});
    var elem = window.document.createElement('a');
    elem.href = window.URL.createObjectURL(csvData);
    elem.download = filename + '.csv';
    document.body.appendChild(elem);
    elem.click();
    document.body.removeChild(elem)
}


const filename = 'WTT_converted'
var downloadfile = cds_to_csv(source, filename)
1 Like