In arrow
, can create schema
by map just as below my_schema_1
.
I want to create my_schema_2
, the schema type to double
or string
accoring varible type,
but the my_schema_2 list value sequence not match width the name of csv file sample.csv
,
So arrow_data_2 %>% as.data.frame()
failed . How to fix it ? Thanks!
library(tidyverse)
library(arrow)
library(lubridate)
data(lakers)
create file sample.csv
write.csv(lakers,"sample.csv",row.names = FALSE)
create my_schema_1
and following process successed
my_schema_1 <- schema(
purrr::map(names(lakers),
~ Field$create(name=.x,type = string()))
)
arrow_data_1 <- arrow::open_csv_dataset('sample.csv',schema = my_schema_1,skip =1)
arrow_data_1 %>% as.data.frame()
create my_schema_2
and arrow_data_2 %>% as.data.frame()
failed,
cause the value sequence in my_schema_2
not match with the names of sample.csv
my_schema_2 <- schema(
c(purrr::map(lakers %>% sapply(.,is.numeric) %>% which() %>% names(),
~ Field$create(name=.x,type = double())),
purrr::map(lakers %>% sapply(.,is.character) %>% which() %>% names(),
~ Field$create(name=.x,type = string()))
)
)
arrow_data_2 <- arrow::open_csv_dataset('sample.csv',schema = my_schema_2,skip=1)
arrow_data_2 %>% as.data.frame()
Another way to create schema_2
is to use a loop, which preserves the order of the variables.
my_schema_2 <- schema(sapply(names(lakers), \(x) {
type = typeof(get(x, lakers))
switch(type,
integer = field(name=x, type=double()),
character = field(name=x, type=string())
)
}))
arrow_data_2 <- arrow::open_csv_dataset('sample.csv',
schema = my_schema_2,skip=1)
arrow_data_2 %>% as.data.frame()
# A tibble: 34,624 × 13
date opponent game_type time period etype team player result points type x y
<dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
1 20081028 POR home 12:00 1 jump ball OFF NA NA 0 NA NA NA
2 20081028 POR home 11:39 1 shot LAL Pau Gasol missed 0 hook 23 13
3 20081028 POR home 11:37 1 rebound LAL Vladimir Radmanovic NA 0 off NA NA
4 20081028 POR home 11:25 1 shot LAL Derek Fisher missed 0 layup 25 6
5 20081028 POR home 11:23 1 rebound LAL Pau Gasol NA 0 off NA NA
6 20081028 POR home 11:22 1 shot LAL Pau Gasol made 2 hook 25 10
7 20081028 POR home 11:22 1 foul POR Greg Oden NA 0 shoo… NA NA
8 20081028 POR home 11:22 1 free throw LAL Pau Gasol made 1 NA NA NA
9 20081028 POR home 11:00 1 foul LAL Vladimir Radmanovic NA 0 pers… NA NA
10 20081028 POR home 10:53 1 shot POR LaMarcus Aldridge made 2 jump 36 21