i have a df with a dict type column named measures
like below:
How can I flatten this column as new columns in the same DF?
I recently had the same problem, wanting to extract and flatten data from a JSON, it might be overkill for your issue and a bit obscure but here it is:
This expects Dict
s and ignores missing or malformed data
function extract_flatten(data::AbstractDict, extract::AbstractDict; cmdchar::AbstractChar='%')
res = Dict()
for (key, val) in extract
temp = Any[data]
keys = [key]
for v in val
if v isa AbstractString
if v[1] == cmdchar
v = split(v[2:end], ':')
if v[1] == "all"
temp2 = []
keys2 = String[]
for (t,k) in zip(temp, keys)
for (kt,vt) in pairs(t)
push!(keys2, join([k; v[2:end]; kt], '_'))
push!(temp2, vt)
end
end
temp = temp2
keys = keys2
elseif v[1] == "name"
keys .*= '_' * join(v[2:end], '_')
else
error("$(repr(v)) is not a valid command")
end
else
temp .= getdefnothing.(temp, Ref(v))
end
elseif v isa Integer
temp .= getdefnothing.(temp, Ref(v))
else
error("$(repr(v)) is not a valid key")
end
nothings = isnothing.(temp)
deleteat!(temp, nothings)
deleteat!(keys, nothings)
isempty(temp) && break
end
push!.(Ref(res), keys .=> temp)
end
return res
end
getdefnothing(x, y) = nothing
getdefnothing(x::AbstractDict, y) = get(x, y, nothing)
getdefnothing(x::AbstractArray, y) = get(x, y, nothing)
example use:
using Test
const d = Dict
schema = d(
"a" => ["b", "c", "d"],
"b" => ["e"],
"c" => ["f", "%all:z", "g"]
)
a = d("z" => 3)
@test extract_flatten(a, schema) == d()
b = d("e" => 0.123)
@test extract_flatten(b, schema) == d("b" => 0.123)
c = d("e" => true, "b" => d("c" => d("d" => "ABC")))
@test extract_flatten(c, schema) == d("b" => true, "a" => "ABC")
e = d("f" => d(
"a" => d("g" => "A"),
"b" => d("g" => "B")
))
@test extract_flatten(e, schema) == d("c_z_a" => "A", "c_z_b" => "B")
f = d("f" => [
d("g" => "A"),
d("g" => "B")
])
@test extract_flatten(f, schema) == d("c_z_1" => "A", "c_z_2" => "B")
g = d("e" => nothing, "f" => [1,2,3])
@test extract_flatten(g, schema) == d()