What is the ideal way to store duration datatype of polars in a spreadsheet, as it is getting converted on its own, and I do not get the data like “1d 2m” when I export to spreadsheet?
Convert
1d 2m 2h 1m
To either a string or a time format
Update: .dt.to_string()
support for Duration types was added in Polars 1.14.0
df.with_columns(
pl.col("duration").dt.to_string(format="polars")
.name.suffix("_str")
)
shape: (1, 2)
┌───────────────────┬───────────────────┐
│ duration ┆ duration_str │
│ --- ┆ --- │
│ duration[μs] ┆ str │
╞═══════════════════╪═══════════════════╡
│ 1d 2m 3s 987654µs ┆ 1d 2m 3s 987654µs │
└───────────────────┴───────────────────┘
Original answer
There is an open feature request for keeping the Duration format when casted to String:
df = pl.select(duration = pl.duration(days=1, seconds=123, microseconds=987654))
df.with_columns(duration_string = pl.col("duration").cast(str))
shape: (1, 2)
┌───────────────────┬─────────────────┐
│ duration ┆ duration_string │
│ --- ┆ --- │
│ duration[μs] ┆ str │
╞═══════════════════╪═════════════════╡
│ 1d 2m 3s 987654µs ┆ 86523987654 │
└───────────────────┴─────────────────┘
The code for the frame repr formatting is here:
It is possible to attempt to port it to Polars expressions.
Probably not something one would use, but may serve as an interesting example of what is possible with expressions.
def duration_to_string(col, time_unit="us"):
names = ["d", "h", "m", "s"]
sizes = {
"us": [864e8, 36e8, 6e7, 1e6],
"ns": [864e11, 36e11, 6e10, 1e9],
"ms": [864e5, 36e5, 6e4, 1e3]
}[time_unit]
remainder = {
"us":
(pl.when(col % 1e3 != 0).then(pl.format("{}µs", (col % 1e6).cast(int)))
.when(col % 1e6 != 0).then(pl.format("{}ms", ((col % 1e6) / 1e3).cast(int)))),
"ns":
(pl.when(col % 1e3 != 0).then(pl.format("{}ns", (col % 1e9).cast(int)))
.when(col % 1e6 != 0).then(pl.format("{}µs", ((col % 1e9) / 1e3).cast(int)))
.when(col % 1e9 != 0).then(pl.format("{}ms", ((col % 1e9) / 1e6).cast(int)))),
"ms":
(pl.when(col % 1e3 != 0).then(pl.format("{}ms", (col % 1e3).cast(int))))
}[time_unit]
blank, space = pl.lit(""), pl.lit(" ")
def build_fmt(name, whole_num, size):
valid = (whole_num <= -1) | (whole_num >= 1)
pad = pl.when(col % size != 0).then(space).otherwise(blank)
fmt = pl.format("{}{}{}", whole_num.cast(int), pl.lit(name), space)
return pl.when(valid).then(fmt).otherwise(blank)
size = sizes[0]
whole_num = col / sizes[0]
expr = build_fmt(names[0], whole_num, size)
for i in range(1, 4):
size = sizes[i]
whole_num = (col % sizes[i - 1]) / size
expr += build_fmt(names[i], whole_num, size)
expr += remainder.otherwise(blank)
return (
pl.when(col == 0)
.then(pl.lit("0" + time_unit.replace("u", "µ")))
.otherwise(expr)
)
And call it with Expr.pipe()
df.with_columns(
pl.col("duration").pipe(duration_to_string, time_unit=df.schema["duration"].time_unit)
.name.suffix("_string")
)
shape: (1, 2)
┌───────────────────┬───────────────────┐
│ duration ┆ duration_string │
│ --- ┆ --- │
│ duration[μs] ┆ str │
╞═══════════════════╪═══════════════════╡
│ 1d 2m 3s 987654µs ┆ 1d 2m 3s 987654µs │
└───────────────────┴───────────────────┘