I have two dataframes:
One with buys
df_buy = pl.DataFrame(
{
"BuyId": [1, 2],
"Item": ["A", "A"],
"BuyDate": [date.fromisoformat("2023-01-01"), date.fromisoformat("2024-03-07")],
"Quantity": [40, 50],
}
)
BuyId | Item | BuyDate | Quantity |
---|---|---|---|
1 | A | 2023-01-01 | 40 |
2 | A | 2024-03-07 | 50 |
And other with sells:
df_sell = pl.DataFrame(
{
"SellId": [3, 4],
"Item": ["A", "A"],
"SellDate": [date.fromisoformat("2024-04-01"), date.fromisoformat("2024-05-01")],
"Quantity": [10, 45],
}
)
SellId | Item | SellDate | Quantity |
---|---|---|---|
3 | A | 2024-04-01 | 10 |
4 | A | 2024-05-01 | 45 |
I want to determine which sales came from which purchases using the FIFO method.
The result should be something like this.
BuyId | Item | BuyDate | RemainingQuantity | SellId | SellDate | SellQuantity | QuantityAfterSell |
---|---|---|---|---|---|---|---|
1 | A | 2023-01-01 | 40 | 3 | 2024-04-01 | 10 | 30 |
1 | A | 2023-01-01 | 30 | 4 | 2024-05-01 | 30 | 0 |
2 | A | 2024-03-07 | 50 | 4 | 2024-05-01 | 15 | 35 |
I know that I can do it using a for loop but I wanted to know if there is a more vectorized way to do it.
Edit:
Added new example for testing:
df_buy = pl.DataFrame(
{
"BuyId": [5, 1, 2],
"Item": ["B", "A", "A"],
"BuyDate": [date.fromisoformat("2023-01-01"), date.fromisoformat("2023-01-01"), date.fromisoformat("2024-03-07")],
"Quantity": [10, 40, 50],
}
)
df_sell = pl.DataFrame(
{
"SellId": [6, 3, 4],
"Item": ["B", "A", "A"],
"SellDate": [
date.fromisoformat("2024-04-01"),
date.fromisoformat("2024-04-01"),
date.fromisoformat("2024-05-01"),
],
"Quantity": [5, 10, 45],
}
)
cum_sum()
to calculate running total.df_buy_total = (
df_buy
.with_columns(QuantityTotal = pl.col.Quantity.cum_sum().over("Item"))
)
df_sell_total = (
df_sell
.with_columns(QuantityTotal = pl.col.Quantity.cum_sum().over("Item"))
)
┌───────┬──────┬────────────┬──────────┬───────────────┐
│ BuyId ┆ Item ┆ BuyDate ┆ Quantity ┆ QuantityTotal │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ date ┆ i64 ┆ i64 │
╞═══════╪══════╪════════════╪══════════╪═══════════════╡
│ 1 ┆ A ┆ 2023-01-01 ┆ 40 ┆ 40 │
│ 2 ┆ A ┆ 2024-03-07 ┆ 50 ┆ 90 │
└───────┴──────┴────────────┴──────────┴───────────────┘
┌────────┬──────┬────────────┬──────────┬───────────────┐
│ SellId ┆ Item ┆ SellDate ┆ Quantity ┆ QuantityTotal │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ date ┆ i64 ┆ i64 │
╞════════╪══════╪════════════╪══════════╪═══════════════╡
│ 3 ┆ A ┆ 2024-04-01 ┆ 10 ┆ 10 │
│ 4 ┆ A ┆ 2024-05-01 ┆ 45 ┆ 55 │
└────────┴──────┴────────────┴──────────┴───────────────┘
BuyId
and SellId
belong together. For this we can use join_asof()
df_sell2buy = (
df_sell_total
.join_asof(
df_buy_total, on="QuantityTotal", strategy="forward", by="Item", coalesce=False, suffix="Buy"
)
.select(
"BuyId", "SellId", "Item", "BuyDate", "SellDate",
"QuantityTotalBuy",
pl.col.QuantityTotal.alias("QuantityTotalSell")
)
.filter(pl.col.BuyId.is_not_null(), pl.col.QuantityTotalBuy > pl.col.QuantityTotalSell)
)
df_buy2sell = (
df_buy_total
.join_asof(
df_sell_total, on="QuantityTotal", strategy="forward", by="Item", coalesce=False, suffix="Sell"
)
.select(
"BuyId", "SellId", "Item", "BuyDate", "SellDate",
pl.col.QuantityTotal.alias("QuantityTotalBuy"),
pl.col.QuantityTotalSell.forward_fill().over("Item").fill_null(0)
)
)
┌───────┬────────┬──────┬────────────┬────────────┬──────────────────┬───────────────────┐
│ BuyId ┆ SellId ┆ Item ┆ BuyDate ┆ SellDate ┆ QuantityTotalBuy ┆ QuantityTotalSell │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ date ┆ date ┆ i64 ┆ i64 │
╞═══════╪════════╪══════╪════════════╪════════════╪══════════════════╪═══════════════════╡
│ 1 ┆ 3 ┆ A ┆ 2023-01-01 ┆ 2024-04-01 ┆ 40 ┆ 10 │
│ 2 ┆ 4 ┆ A ┆ 2024-03-07 ┆ 2024-05-01 ┆ 90 ┆ 55 │
└───────┴────────┴──────┴────────────┴────────────┴──────────────────┴───────────────────┘
┌───────┬────────┬──────┬────────────┬────────────┬──────────────────┬───────────────────┐
│ BuyId ┆ SellId ┆ Item ┆ BuyDate ┆ SellDate ┆ QuantityTotalBuy ┆ QuantityTotalSell │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ date ┆ date ┆ i64 ┆ i64 │
╞═══════╪════════╪══════╪════════════╪════════════╪══════════════════╪═══════════════════╡
│ 1 ┆ 4 ┆ A ┆ 2023-01-01 ┆ 2024-05-01 ┆ 40 ┆ 55 │
│ 2 ┆ null ┆ A ┆ 2024-03-07 ┆ null ┆ 90 ┆ 55 │
└───────┴────────┴──────┴────────────┴────────────┴──────────────────┴───────────────────┘
df_result = (
pl.concat([df_sell2buy, df_buy2sell])
.filter(~(
pl.col.SellId.is_null() &
pl.col.SellId.is_not_null().any().over("BuyId")
))
.sort("Item", "BuyId", "SellId", nulls_last=True)
)
┌───────┬────────┬──────┬────────────┬────────────┬──────────────────┬───────────────────┐
│ BuyId ┆ SellId ┆ Item ┆ BuyDate ┆ SellDate ┆ QuantityTotalBuy ┆ QuantityTotalSell │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ date ┆ date ┆ i64 ┆ i64 │
╞═══════╪════════╪══════╪════════════╪════════════╪══════════════════╪═══════════════════╡
│ 1 ┆ 3 ┆ A ┆ 2023-01-01 ┆ 2024-04-01 ┆ 40 ┆ 10 │
│ 1 ┆ 4 ┆ A ┆ 2023-01-01 ┆ 2024-05-01 ┆ 40 ┆ 55 │
│ 2 ┆ 4 ┆ A ┆ 2024-03-07 ┆ 2024-05-01 ┆ 90 ┆ 55 │
└───────┴────────┴──────┴────────────┴────────────┴──────────────────┴───────────────────┘
(
df_result
.with_columns(
QuantityRunning = pl.min_horizontal("QuantityTotalBuy", "QuantityTotalSell"),
)
.with_columns(
QuantityRunningPrev = pl.col.QuantityRunning.shift(1).over("Item").fill_null(0)
)
.select(
"BuyId", "Item", "BuyDate",
(pl.col.QuantityTotalBuy - pl.col.QuantityRunningPrev).alias("RemainingQuantity"),
"SellId", "SellDate",
(pl.col.QuantityRunning - pl.col.QuantityRunningPrev).alias("SellQuantity"),
(pl.col.QuantityTotalBuy - pl.col.QuantityRunning).alias("QuantityAfterSell")
)
)
┌───────┬──────┬────────────┬───────────────────┬────────┬────────────┬──────────────┬───────────────────┐
│ BuyId ┆ Item ┆ BuyDate ┆ RemainingQuantity ┆ SellId ┆ SellDate ┆ SellQuantity ┆ QuantityAfterSell │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ date ┆ i64 ┆ i64 ┆ date ┆ i64 ┆ i64 │
╞═══════╪══════╪════════════╪═══════════════════╪════════╪════════════╪══════════════╪═══════════════════╡
│ 1 ┆ A ┆ 2023-01-01 ┆ 40 ┆ 3 ┆ 2024-04-01 ┆ 10 ┆ 30 │
│ 1 ┆ A ┆ 2023-01-01 ┆ 30 ┆ 4 ┆ 2024-05-01 ┆ 30 ┆ 0 │
│ 2 ┆ A ┆ 2024-03-07 ┆ 50 ┆ 4 ┆ 2024-05-01 ┆ 15 ┆ 35 │
└───────┴──────┴────────────┴───────────────────┴────────┴────────────┴──────────────┴───────────────────┘