I want to load a CSV file in R and convert it to a dataframe and tried
read.csv("ASV_file.csv", sep = ",", header = FALSE) # (1)
readr::read_csv("ASV_file.csv", sep = ",", header = FALSE) # (2)
However, each row is not separate, but recognized as one column. The data of the first two rows read using readLines() are as follows.
[1] "\",\"\"id\"\",\"\"A-1\"\",\"\"A-2\"\",\"\"A-3\"\",\"\"A-4\"\",\"\"A-5\"\",\"\"A-6\"\",\"\"A-7\"\",\"\"A-8\"\",\"\"A-9\"\",\"\"A-10\"\",\"\"A-11\"\",\"\"A-12\"\",\"\"A-13\"\",\"\"A-14\"\",\"\"A-15\"\",\"\"A-16\"\",\"\"A-17\"\",\"\"A-18\"\",\"\"A-19\"\",\"\"A-20\"\",\"\"A-21\"\",\"\"A-22\"\",\"\"A-23\"\",\"\"A-24\"\",\"\"A-25\"\",\"\"A-26\"\",\"\"A-27\"\",\"\"A-28\"\",\"\"A-29\"\",\"\"A-30\"\",\"\"A-31\"\",\"\"Kingdom\"\",\"\"Phylum\"\",\"\"Class\"\",\"\"Order\"\",\"\"Family\"\",\"\"Genus\"\",\"\"Species\"\"\""
[2] "\"1,\"\"ae82\"\",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\"\"d__Bacteria\"\",\"\"Bacteroidota\"\",\"\"Bacteroidia\"\",\"\"Bacteroidales\"\",\"\"Bacteroidales_BS\"\",NA,NA\""
Should I define the sepeartor as something other than a comma?
When I open this csv file in Excel and separate the columns with commas, the columns was well separated without any problems.
** Add: These are lines when I open that file with text editor **
",""id"",""A-1"",""A-2"",""A-3"",""A-4"",""A-5"",""A-6"",""A-7"",""A-8"",""A-9"",""A-10"",""A-11"",""A-12"",""A-13"",""A-14"",""A-15"",""A-16"",""A-17"",""A-18"",""A-19"",""A-20"",""A-21"",""A-22"",""A-23"",""A-24"",""A-25"",""A-26"",""A-27"",""A-28"",""A-29"",""A-30"",""A-31"",""Kingdom"",""Phylum"",""Class"",""Order"",""Family"",""Genus"",""Species"""
"1,""ae826634247e3eeae6757a595938319d"",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,""d__Bacteria"",""Bacteroidota"",""Bacteroidia"",""Bacteroidales"",""Bacteroidales_BS11_gut_group"",NA,NA"
Depending on actual stored values (e.g. any single- and/or double-quotes in string fields) it might be enough if you just strip start/end quotes from each line and replace ""
with "
before parsing:
library(readr)
library(stringr)
# file.input <- "ASV_file.csv"
file.df <-
file.input |>
read_lines() |>
str_remove_all("^\"|\"$") |>
str_replace_all(fixed('""'), '"') |>
I() |>
read_csv()
#> New names:
#> Rows: 1 Columns: 40
#> ── Column specification
#> ──────────────────────────────────────────────────────── Delimiter: "," chr
#> (6): id, Kingdom, Phylum, Class, Order, Family dbl (32): ...1, A-1, A-2, A-3,
#> A-4, A-5, A-6, A-7, A-8, A-9, A-10, A-11, A-1... lgl (2): Genus, Species
#> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
#> Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> • `` -> `...1`
file.df
#> # A tibble: 1 × 40
#> ...1 id `A-1` `A-2` `A-3` `A-4` `A-5` `A-6` `A-7` `A-8` `A-9` `A-10`
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 ae82 0 10 13 16 26 29 36 0 0 0
#> # ℹ 28 more variables: `A-11` <dbl>, `A-12` <dbl>, `A-13` <dbl>, `A-14` <dbl>,
#> # `A-15` <dbl>, `A-16` <dbl>, `A-17` <dbl>, `A-18` <dbl>, `A-19` <dbl>,
#> # `A-20` <dbl>, `A-21` <dbl>, `A-22` <dbl>, `A-23` <dbl>, `A-24` <dbl>,
#> # `A-25` <dbl>, `A-26` <dbl>, `A-27` <dbl>, `A-28` <dbl>, `A-29` <dbl>,
#> # `A-30` <dbl>, `A-31` <dbl>, Kingdom <chr>, Phylum <chr>, Class <chr>,
#> # Order <chr>, Family <chr>, Genus <lgl>, Species <lgl>
dplyr::glimpse(file.df)
#> Rows: 1
#> Columns: 40
#> $ ...1 <dbl> 1
#> $ id <chr> "ae82"
#> $ `A-1` <dbl> 0
#> $ `A-2` <dbl> 10
#> $ `A-3` <dbl> 13
#> $ `A-4` <dbl> 16
#> $ `A-5` <dbl> 26
#> $ `A-6` <dbl> 29
#> $ `A-7` <dbl> 36
#> $ `A-8` <dbl> 0
#> $ `A-9` <dbl> 0
#> $ `A-10` <dbl> 0
#> $ `A-11` <dbl> 0
#> $ `A-12` <dbl> 0
#> $ `A-13` <dbl> 0
#> $ `A-14` <dbl> 0
#> $ `A-15` <dbl> 0
#> $ `A-16` <dbl> 0
#> $ `A-17` <dbl> 0
#> $ `A-18` <dbl> 0
#> $ `A-19` <dbl> 0
#> $ `A-20` <dbl> 0
#> $ `A-21` <dbl> 0
#> $ `A-22` <dbl> 0
#> $ `A-23` <dbl> 0
#> $ `A-24` <dbl> 0
#> $ `A-25` <dbl> 0
#> $ `A-26` <dbl> 0
#> $ `A-27` <dbl> 0
#> $ `A-28` <dbl> 0
#> $ `A-29` <dbl> 0
#> $ `A-30` <dbl> 0
#> $ `A-31` <dbl> 0
#> $ Kingdom <chr> "d__Bacteria"
#> $ Phylum <chr> "Bacteroidota"
#> $ Class <chr> "Bacteroidia"
#> $ Order <chr> "Bacteroidales"
#> $ Family <chr> "Bacteroidales_BS"
#> $ Genus <lgl> NA
#> $ Species <lgl> NA
Example data:
file.input <-
c("\",\"\"id\"\",\"\"A-1\"\",\"\"A-2\"\",\"\"A-3\"\",\"\"A-4\"\",\"\"A-5\"\",\"\"A-6\"\",\"\"A-7\"\",\"\"A-8\"\",\"\"A-9\"\",\"\"A-10\"\",\"\"A-11\"\",\"\"A-12\"\",\"\"A-13\"\",\"\"A-14\"\",\"\"A-15\"\",\"\"A-16\"\",\"\"A-17\"\",\"\"A-18\"\",\"\"A-19\"\",\"\"A-20\"\",\"\"A-21\"\",\"\"A-22\"\",\"\"A-23\"\",\"\"A-24\"\",\"\"A-25\"\",\"\"A-26\"\",\"\"A-27\"\",\"\"A-28\"\",\"\"A-29\"\",\"\"A-30\"\",\"\"A-31\"\",\"\"Kingdom\"\",\"\"Phylum\"\",\"\"Class\"\",\"\"Order\"\",\"\"Family\"\",\"\"Genus\"\",\"\"Species\"\"\"",
"\"1,\"\"ae82\"\",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\"\"d__Bacteria\"\",\"\"Bacteroidota\"\",\"\"Bacteroidia\"\",\"\"Bacteroidales\"\",\"\"Bacteroidales_BS\"\",NA,NA\"" ) |>
paste0(collapse = "\n")
cat(file.input)
",""id"",""A-1"",""A-2"",""A-3"",""A-4"",""A-5"",""A-6"",""A-7"",""A-8"",""A-9"",""A-10"",""A-11"",""A-12"",""A-13"",""A-14"",""A-15"",""A-16"",""A-17"",""A-18"",""A-19"",""A-20"",""A-21"",""A-22"",""A-23"",""A-24"",""A-25"",""A-26"",""A-27"",""A-28"",""A-29"",""A-30"",""A-31"",""Kingdom"",""Phylum"",""Class"",""Order"",""Family"",""Genus"",""Species"""
"1,""ae82"",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,""d__Bacteria"",""Bacteroidota"",""Bacteroidia"",""Bacteroidales"",""Bacteroidales_BS"",NA,NA"
Created on 2025-03-25 with reprex v2.1.1