pythonggplot2plotnine

How to get a line marking for each tick on x axis in plotnine chart in python?


I am trying to replicate some bits from a reference chart and create marking for every tick on x axis in plotnine plot.

Reference Chart: enter image description here

I am not able to figure out how to get the grey tick marking for each country (in ref. chart circled with red) with respect to x axis ticks. (Ref of image & code of this plot plot link here)

I am looking to create the grey xtick markings for 'child_category' in the Code that I have tried below:

import pandas as pd
import plotnine as p9
from plotnine import *

# Create a sample dataset
new_data = {
    'date': pd.date_range('2022-01-01', periods=8, freq="ME"),
    'parent_category': ['Electronics', 'Electronics', 'Fashion', 'Fashion', 'Home Goods', 'Electronics', 'Fashion','Electronics'],
    'child_category': ['Smartphones', 'Laptops', 'Shirts', 'Pants', 'Kitchenware','Laptops', 'Shirts', 'Smartphones']
}

# Create the DataFrame
new_data = pd.DataFrame(new_data)
(ggplot(new_data
        ,aes(x="date", y="child_category")) +
      #   geom_line(size = 8, color = "pink") + # #edece3
        geom_point(size = 6, color = "purple",fill = "red",stroke=1,alpha = 0.5) +
        facet_wrap("parent_category", scales="free_y",ncol=1) + # 
        scale_x_datetime(date_breaks="3 month") +
        theme_538() +
        theme(axis_text_x=element_text(angle=45, hjust=1),
              panel_grid_major = element_blank(),
              figure_size=(8, 6)
              )
        )

enter image description here


Solution

  • Basically you draw the grid lines using a geom_segment as in the code used for the referenced plot . But as you are using faceting this requires some effort to set up the data:

    import pandas as pd
    import plotnine as p9
    from plotnine import *
    import numpy as np
    
    grid_data = new_data[['parent_category', 'child_category']].drop_duplicates(subset=['parent_category', 'child_category'])
    
    grid_data['child_num'] = grid_data.groupby('parent_category')['child_category']\
        .transform(lambda x: pd.factorize(x)[0] + 1)  # Factorize and add 1 for 1-based indexing
    
    # Create the second DataFrame
    ticks = pd.DataFrame({'x': pd.date_range(min(new_data.date), max(new_data.date), 10)})
    
    # Perform a Cartesian product using .merge() with no keys
    grid_data = grid_data.merge(ticks, how='cross')
    
    (ggplot(
            new_data,
            aes(x="date", y="child_category")
        ) +
        scale_y_discrete()  +
        geom_segment(
            data=grid_data,
            mapping=aes(y = "child_num + .25", yend = "child_num - .25", x = "x", xend = "x"),
            color="#CCCCCC"
        ) +
        geom_point(size = 6, color = "purple",fill = "red",stroke=1,alpha = 0.5) +
        facet_wrap("parent_category", scales="free_y",ncol=1) + # 
        scale_x_datetime(date_breaks="3 month") +
        theme_538() +
        theme(
            axis_text_x=element_text(angle=45, hjust=1),    
            panel_grid_major = element_blank(),
            figure_size=(8, 6)
            )
        )
    

    enter image description here