pythonplotlyvisualizationscatterlarge-data-volumes

Plotly scatter large volume geographic data


I tried to write a code that creates a visualization of all forest fires that happened during the year 2021. The CSV file containing the data is around 1.5Gb, the program looks correct for me, but when I try to run it, it gets stuck without displaying any visualization or error message. The last time I tried, it run for almost half a day until python crashed. I don't know if I am having an infinite loop, if that's because the file is too big or if there is something else I am missing. Can anyone provide feedback, please?

Here is my code:

import csv
from datetime import datetime
from plotly.graph_objs import Scattergeo , Layout
from plotly import offline

filename='fire_nrt_J1V-C2_252284.csv'
with open(filename) as f:
    reader=csv.reader(f)
    header_row=next(reader)
    

    lats, lons, brights, dates=[],[],[],[]
    for row in reader:
        date=datetime.strptime(row[5], '%Y-%m-%d')
        lat=row[0]
        lon=row[1]
        bright=row[2]
        lats.append(lat)
        lons.append(lon)
        brights.append(bright)
        dates.append(date)

data=[{
    'type':'scattergeo',
    'lon':lons,
    'lat':lats,
    'text':dates,
    'marker':{
        'size':[5*bright for bright in brights],
        'color': brights,
        'colorscale':'Reds',
        'colorbar': {'title':'Fire brightness'},

    }
}]

my_layout=Layout(title="Forestfires during the year 2021")
fig={'data':data,'layout':my_layout}
offline.plot(fig, filename='global_fires_2021.html')

Solution

  • data sourcing

    import pandas as pd
    import plotly.express as px
    import plotly.graph_objects as go
    
    df = pd.read_csv("https://firms.modaps.eosdis.nasa.gov/data/active_fire/noaa-20-viirs-c2/csv/J1_VIIRS_C2_Global_7d.csv")
    df
    

    scatter_geo

    
    px.scatter_geo(
        df.sample(1000),
        lat="latitude",
        lon="longitude",
        color="bright_ti4",
        # size="size",
        hover_data=["acq_date"],
        color_continuous_scale="reds",
    )
    

    enter image description here

    density mapbox

    px.density_mapbox(
        df.sample(5000),
        lat="latitude",
        lon="longitude",
        z="bright_ti4",
        radius=3,
        color_continuous_scale="reds",
        zoom=1,
        mapbox_style="carto-positron",
    )
    

    enter image description here

    datashader Mapbox

    import datashader as ds, colorcet
    from pyproj import Transformer
    
    t3857_to_4326 = Transformer.from_crs(3857, 4326, always_xy=True)
    
    # project CRS to ensure image overlays appropriately back over mapbox
    # https://community.plotly.com/t/datashader-image-distorted-when-passed-to-mapbox/39375/2
    df.loc[:, "longitude_3857"], df.loc[:, "latitude_3857"] = ds.utils.lnglat_to_meters(
        df.longitude, df.latitude
    )
    
    RESOLUTION=1000
    cvs = ds.Canvas(plot_width=RESOLUTION, plot_height=RESOLUTION)
    agg = cvs.points(df, x="longitude_3857", y="latitude_3857")
    img = ds.tf.shade(agg, cmap=colorcet.fire).to_pil()
    
    fig = go.Figure(go.Scattermapbox())
    fig.update_layout(
        mapbox={
            "style": "carto-positron",
            "layers": [
                {
                    "sourcetype": "image",
                    "source": img,
                    # Sets the coordinates array contains [longitude, latitude] pairs for the image corners listed in
                    # clockwise order: top left, top right, bottom right, bottom left.
                    "coordinates": [
                        t3857_to_4326.transform(
                            agg.coords["longitude_3857"].values[a],
                            agg.coords["latitude_3857"].values[b],
                        )
                        for a, b in [(0, -1), (-1, -1), (-1, 0), (0, 0)]
                    ],
                }
            ],
        },
        margin={"l": 0, "r": 0, "t": 0, "r": 0},
    )
    

    enter image description here