pythonplotly-dash

Extract only filtered data from table in Dash Python


I've built a website in Dash Python and I display all the data of my csv file in a table which can be filtered.

I want the user to be able to extract the data from the table. When there is not filter, I want him to be able to extract full data and when it's filtered by him, I want him to be able to extract the data filtered.

For that, I use the dcc.download component which is working very well and I also use df (from df = pd.read_csv("./data.csv")) as a global variable, to be able to reuse it in my extract callback.

Here is my code:

from dash import Dash, dash_table, dcc, html, State
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import csv
import time
import xlsxwriter
from datetime import datetime
from dash_extensions.enrich import Output, DashProxy, Input, MultiplexerTransform
import os

app = DashProxy(external_stylesheets=[dbc.themes.BOOTSTRAP], transforms=[MultiplexerTransform()])
server = app.server

df = pd.read_csv("./data.csv")
df = df.fillna("NaN")
PAGE_SIZE = 20

# Layout
app.layout = html.Div(children=[
        dcc.Download(id="download-dataframe-csv"),
        dbc.Card([
            dbc.CardBody([
                dash_table.DataTable(
                    id='table-sorting-filtering',
                    columns=[{'name': i, 'id': i} for i in df.columns],
                    page_current= 0,
                    page_size= PAGE_SIZE,
                    page_action='custom',
                    filter_action='custom',
                    filter_query='',
                    sort_action='custom',
                    sort_mode='multi',
                    sort_by=[],
                    style_data={'text-align':'center'},
                    style_header={
                            'backgroundColor': 'white',
                            'fontWeight': 'bold',
                            'text-align':'center'
                        },
                    style_cell={'padding': '5px'},
                    style_as_list_view=True,
                )]
            )],
            style={"margin-left":"15px", "margin-right":"15px"}
        ),
        dcc.Interval(
                id='interval-component',
                interval=1*1000, # in milliseconds
                n_intervals=0
            ),
        ]
    )]
)


operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains ']]

def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]
                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part
                return name, operator_type[0].strip(), value
    return [None] * 3

# Display data in table and manage filtering
@app.callback(
    Output('table-sorting-filtering', 'data'),
    Input('table-sorting-filtering', "page_current"),
    Input('table-sorting-filtering', "page_size"),
    Input('table-sorting-filtering', 'sort_by'),
    Input('table-sorting-filtering', 'filter_query'),
    Input('interval-component', 'n_intervals'))

def update_table(page_current, page_size, sort_by, filter, n):
    global df
    global date_time
    df = pd.read_csv("./data.csv")
    df = df.fillna("NaN")
    date_time = last_modification_time_of_csv("./data.csv")
    filtering_expressions = filter.split(' && ')
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            df = df.loc[getattr(df[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if type(filter_value) is str:
                df = df.loc[df[col_name].str.contains(filter_value)]
    if len(sort_by):
        df = df.sort_values(
            [col['column_id'] for col in sort_by],
            ascending=[
                col['direction'] == 'asc'
                for col in sort_by
            ],
            inplace=False
        )
    page = page_current
    size = page_size
    return df.iloc[page * size: (page + 1) * size].to_dict('records')

# Export button
@app.callback(
    Output("download-dataframe-csv", "data"),
    Input("button_export", "n_clicks"),
    prevent_initial_call=True,
)
def export_on_click(n_clicks):
    global df
    return dcc.send_data_frame(df.to_excel, "export.xlsx")

if __name__ == '__main__':
    app.run_server(debug=True, host='0.0.0.0')

I notice that I have some problems when extracting the data. Sometimes it's working very well, sometimes I'm extracting data that I didn't filter at all. So I'm wondering if global variables are a good solutions because I have multiple users using my website.

I saw there is a way to do that with state in Dash Python but I didn't understand well how it's working and if it’s something I can use in my case.

Someone can please help me?

Thank you


Solution

  • Globals are generally something to avoid, especially in Dash. The good news is, the fix should be pretty easy here. Since you're using custom filtering on the back end, rather than native, front-end filtering, I think what you need is to add the data prop to your download callback as state. If you'd been using front-end filtering, you'd have to use the derived_virtual_data prop instead. The following should work.

    @app.callback(
        Output("download-dataframe-csv", "data"),
        Input("button_export", "n_clicks"),
        State("table-sorting-filtering", "data"),
        prevent_initial_call=True,
    )
    def export_on_click(n_clicks, table_data):
        df = pd.DataFrame.from_dict(table_data)
        return dcc.send_data_frame(df.to_excel, "export.xlsx")