pythonvectorizationpython-polarsfifo

polars, combining sales and purchases, FIFO method


I have two dataframes:

One with buys

df_buy = pl.DataFrame(
    {
        "BuyId": [1, 2],
        "Item": ["A", "A"],
        "BuyDate": [date.fromisoformat("2023-01-01"), date.fromisoformat("2024-03-07")],
        "Quantity": [40, 50],
    }
)
BuyId Item BuyDate Quantity
1 A 2023-01-01 40
2 A 2024-03-07 50

And other with sells:

df_sell = pl.DataFrame(
    {
        "SellId": [3, 4],
        "Item": ["A", "A"],
        "SellDate": [date.fromisoformat("2024-04-01"), date.fromisoformat("2024-05-01")],
        "Quantity": [10, 45],
    }
)

SellId Item SellDate Quantity
3 A 2024-04-01 10
4 A 2024-05-01 45

I want to determine which sales came from which purchases using the FIFO method.

The result should be something like this.

BuyId Item BuyDate RemainingQuantity SellId SellDate SellQuantity QuantityAfterSell
1 A 2023-01-01 40 3 2024-04-01 10 30
1 A 2023-01-01 30 4 2024-05-01 30 0
2 A 2024-03-07 50 4 2024-05-01 15 35

I know that I can do it using a for loop but I wanted to know if there is a more vectorized way to do it.

Edit:

Added new example for testing:

df_buy = pl.DataFrame(
    {
        "BuyId": [5, 1, 2],
        "Item": ["B", "A", "A"],
        "BuyDate": [date.fromisoformat("2023-01-01"), date.fromisoformat("2023-01-01"), date.fromisoformat("2024-03-07")],
        "Quantity": [10, 40, 50],
    }
)

df_sell = pl.DataFrame(
    {
        "SellId": [6, 3, 4],
        "Item": ["B", "A", "A"],
        "SellDate": [
            date.fromisoformat("2024-04-01"),
            date.fromisoformat("2024-04-01"),
            date.fromisoformat("2024-05-01"),
        ],
        "Quantity": [5, 10, 45],
    }
)

Solution

    1. Calculate total quantity bought so far and total quantity sold so far.
    df_buy_total = (
        df_buy
        .with_columns(QuantityTotal = pl.col.Quantity.cum_sum().over("Item"))
    )
    
    df_sell_total = (
        df_sell
        .with_columns(QuantityTotal = pl.col.Quantity.cum_sum().over("Item"))
    )
    
    ┌───────┬──────┬────────────┬──────────┬───────────────┐
    │ BuyId ┆ Item ┆ BuyDate    ┆ Quantity ┆ QuantityTotal │
    │ ---   ┆ ---  ┆ ---        ┆ ---      ┆ ---           │
    │ i64   ┆ str  ┆ date       ┆ i64      ┆ i64           │
    ╞═══════╪══════╪════════════╪══════════╪═══════════════╡
    │ 1     ┆ A    ┆ 2023-01-01 ┆ 40       ┆ 40            │
    │ 2     ┆ A    ┆ 2024-03-07 ┆ 50       ┆ 90            │
    └───────┴──────┴────────────┴──────────┴───────────────┘
    
    ┌────────┬──────┬────────────┬──────────┬───────────────┐
    │ SellId ┆ Item ┆ SellDate   ┆ Quantity ┆ QuantityTotal │
    │ ---    ┆ ---  ┆ ---        ┆ ---      ┆ ---           │
    │ i64    ┆ str  ┆ date       ┆ i64      ┆ i64           │
    ╞════════╪══════╪════════════╪══════════╪═══════════════╡
    │ 3      ┆ A    ┆ 2024-04-01 ┆ 10       ┆ 10            │
    │ 4      ┆ A    ┆ 2024-05-01 ┆ 45       ┆ 55            │
    └────────┴──────┴────────────┴──────────┴───────────────┘
    
    1. Find which BuyId and SellId belong together. For this we can use join_asof()
    df_sell2buy = (
        df_sell_total
        .join_asof(
            df_buy_total, on="QuantityTotal", strategy="forward", by="Item", coalesce=False, suffix="Buy"
        )
        .select(
            "BuyId", "SellId", "Item", "BuyDate", "SellDate",
            "QuantityTotalBuy",
            pl.col.QuantityTotal.alias("QuantityTotalSell")
        )
        .filter(pl.col.BuyId.is_not_null(), pl.col.QuantityTotalBuy > pl.col.QuantityTotalSell)
    )
    
    df_buy2sell = (
        df_buy_total
        .join_asof(
            df_sell_total, on="QuantityTotal", strategy="forward", by="Item", coalesce=False, suffix="Sell"
        )
        .select(
            "BuyId", "SellId", "Item", "BuyDate", "SellDate",
            pl.col.QuantityTotal.alias("QuantityTotalBuy"),
            pl.col.QuantityTotalSell.forward_fill().over("Item").fill_null(0)
        )
    )
    
    ┌───────┬────────┬──────┬────────────┬────────────┬──────────────────┬───────────────────┐
    │ BuyId ┆ SellId ┆ Item ┆ BuyDate    ┆ SellDate   ┆ QuantityTotalBuy ┆ QuantityTotalSell │
    │ ---   ┆ ---    ┆ ---  ┆ ---        ┆ ---        ┆ ---              ┆ ---               │
    │ i64   ┆ i64    ┆ str  ┆ date       ┆ date       ┆ i64              ┆ i64               │
    ╞═══════╪════════╪══════╪════════════╪════════════╪══════════════════╪═══════════════════╡
    │ 1     ┆ 3      ┆ A    ┆ 2023-01-01 ┆ 2024-04-01 ┆ 40               ┆ 10                │
    │ 2     ┆ 4      ┆ A    ┆ 2024-03-07 ┆ 2024-05-01 ┆ 90               ┆ 55                │
    └───────┴────────┴──────┴────────────┴────────────┴──────────────────┴───────────────────┘
    
    ┌───────┬────────┬──────┬────────────┬────────────┬──────────────────┬───────────────────┐
    │ BuyId ┆ SellId ┆ Item ┆ BuyDate    ┆ SellDate   ┆ QuantityTotalBuy ┆ QuantityTotalSell │
    │ ---   ┆ ---    ┆ ---  ┆ ---        ┆ ---        ┆ ---              ┆ ---               │
    │ i64   ┆ i64    ┆ str  ┆ date       ┆ date       ┆ i64              ┆ i64               │
    ╞═══════╪════════╪══════╪════════════╪════════════╪══════════════════╪═══════════════════╡
    │ 1     ┆ 4      ┆ A    ┆ 2023-01-01 ┆ 2024-05-01 ┆ 40               ┆ 55                │
    │ 2     ┆ null   ┆ A    ┆ 2024-03-07 ┆ null       ┆ 90               ┆ 55                │
    └───────┴────────┴──────┴────────────┴────────────┴──────────────────┴───────────────────┘
    
    1. Combine both linked DataFrames
    df_result = (
        pl.concat([df_sell2buy, df_buy2sell])
        .filter(~(
            pl.col.SellId.is_null() &
            pl.col.SellId.is_not_null().any().over("BuyId")
        ))
        .sort("Item", "BuyId", "SellId", nulls_last=True)
    )
    
    ┌───────┬────────┬──────┬────────────┬────────────┬──────────────────┬───────────────────┐
    │ BuyId ┆ SellId ┆ Item ┆ BuyDate    ┆ SellDate   ┆ QuantityTotalBuy ┆ QuantityTotalSell │
    │ ---   ┆ ---    ┆ ---  ┆ ---        ┆ ---        ┆ ---              ┆ ---               │
    │ i64   ┆ i64    ┆ str  ┆ date       ┆ date       ┆ i64              ┆ i64               │
    ╞═══════╪════════╪══════╪════════════╪════════════╪══════════════════╪═══════════════════╡
    │ 1     ┆ 3      ┆ A    ┆ 2023-01-01 ┆ 2024-04-01 ┆ 40               ┆ 10                │
    │ 1     ┆ 4      ┆ A    ┆ 2023-01-01 ┆ 2024-05-01 ┆ 40               ┆ 55                │
    │ 2     ┆ 4      ┆ A    ┆ 2024-03-07 ┆ 2024-05-01 ┆ 90               ┆ 55                │
    └───────┴────────┴──────┴────────────┴────────────┴──────────────────┴───────────────────┘
    
    1. Calculate final results
    (
        df_result
        .with_columns(
            QuantityRunning = pl.min_horizontal("QuantityTotalBuy", "QuantityTotalSell"),
        )    
        .with_columns(
            QuantityRunningPrev = pl.col.QuantityRunning.shift(1).over("Item").fill_null(0)
        )
        .select(
            "BuyId", "Item", "BuyDate",
            (pl.col.QuantityTotalBuy - pl.col.QuantityRunningPrev).alias("RemainingQuantity"),
            "SellId", "SellDate", 
            (pl.col.QuantityRunning - pl.col.QuantityRunningPrev).alias("SellQuantity"),
            (pl.col.QuantityTotalBuy - pl.col.QuantityRunning).alias("QuantityAfterSell")
        )
    )
    
    ┌───────┬──────┬────────────┬───────────────────┬────────┬────────────┬──────────────┬───────────────────┐
    │ BuyId ┆ Item ┆ BuyDate    ┆ RemainingQuantity ┆ SellId ┆ SellDate   ┆ SellQuantity ┆ QuantityAfterSell │
    │ ---   ┆ ---  ┆ ---        ┆ ---               ┆ ---    ┆ ---        ┆ ---          ┆ ---               │
    │ i64   ┆ str  ┆ date       ┆ i64               ┆ i64    ┆ date       ┆ i64          ┆ i64               │
    ╞═══════╪══════╪════════════╪═══════════════════╪════════╪════════════╪══════════════╪═══════════════════╡
    │ 1     ┆ A    ┆ 2023-01-01 ┆ 40                ┆ 3      ┆ 2024-04-01 ┆ 10           ┆ 30                │
    │ 1     ┆ A    ┆ 2023-01-01 ┆ 30                ┆ 4      ┆ 2024-05-01 ┆ 30           ┆ 0                 │
    │ 2     ┆ A    ┆ 2024-03-07 ┆ 50                ┆ 4      ┆ 2024-05-01 ┆ 15           ┆ 35                │
    └───────┴──────┴────────────┴───────────────────┴────────┴────────────┴──────────────┴───────────────────┘