rcsv

Column separation of csv file in r


I want to load a CSV file in R and convert it to a dataframe and tried

read.csv("ASV_file.csv", sep = ",", header = FALSE) # (1)
readr::read_csv("ASV_file.csv", sep = ",", header = FALSE) # (2)

However, each row is not separate, but recognized as one column. The data of the first two rows read using readLines() are as follows.

[1] "\",\"\"id\"\",\"\"A-1\"\",\"\"A-2\"\",\"\"A-3\"\",\"\"A-4\"\",\"\"A-5\"\",\"\"A-6\"\",\"\"A-7\"\",\"\"A-8\"\",\"\"A-9\"\",\"\"A-10\"\",\"\"A-11\"\",\"\"A-12\"\",\"\"A-13\"\",\"\"A-14\"\",\"\"A-15\"\",\"\"A-16\"\",\"\"A-17\"\",\"\"A-18\"\",\"\"A-19\"\",\"\"A-20\"\",\"\"A-21\"\",\"\"A-22\"\",\"\"A-23\"\",\"\"A-24\"\",\"\"A-25\"\",\"\"A-26\"\",\"\"A-27\"\",\"\"A-28\"\",\"\"A-29\"\",\"\"A-30\"\",\"\"A-31\"\",\"\"Kingdom\"\",\"\"Phylum\"\",\"\"Class\"\",\"\"Order\"\",\"\"Family\"\",\"\"Genus\"\",\"\"Species\"\"\""
[2] "\"1,\"\"ae82\"\",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\"\"d__Bacteria\"\",\"\"Bacteroidota\"\",\"\"Bacteroidia\"\",\"\"Bacteroidales\"\",\"\"Bacteroidales_BS\"\",NA,NA\"" 

Should I define the sepeartor as something other than a comma?

When I open this csv file in Excel and separate the columns with commas, the columns was well separated without any problems.

** Add: These are lines when I open that file with text editor **

",""id"",""A-1"",""A-2"",""A-3"",""A-4"",""A-5"",""A-6"",""A-7"",""A-8"",""A-9"",""A-10"",""A-11"",""A-12"",""A-13"",""A-14"",""A-15"",""A-16"",""A-17"",""A-18"",""A-19"",""A-20"",""A-21"",""A-22"",""A-23"",""A-24"",""A-25"",""A-26"",""A-27"",""A-28"",""A-29"",""A-30"",""A-31"",""Kingdom"",""Phylum"",""Class"",""Order"",""Family"",""Genus"",""Species"""

"1,""ae826634247e3eeae6757a595938319d"",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,""d__Bacteria"",""Bacteroidota"",""Bacteroidia"",""Bacteroidales"",""Bacteroidales_BS11_gut_group"",NA,NA"


Solution

  • Depending on actual stored values (e.g. any single- and/or double-quotes in string fields) it might be enough if you just strip start/end quotes from each line and replace "" with " before parsing:

    library(readr)
    library(stringr)
    
    # file.input <-  "ASV_file.csv"
    file.df <- 
      file.input |> 
      read_lines() |> 
      str_remove_all("^\"|\"$") |> 
      str_replace_all(fixed('""'), '"') |> 
      I() |> 
      read_csv()
    #> New names:
    #> Rows: 1 Columns: 40
    #> ── Column specification
    #> ──────────────────────────────────────────────────────── Delimiter: "," chr
    #> (6): id, Kingdom, Phylum, Class, Order, Family dbl (32): ...1, A-1, A-2, A-3,
    #> A-4, A-5, A-6, A-7, A-8, A-9, A-10, A-11, A-1... lgl (2): Genus, Species
    #> ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
    #> Specify the column types or set `show_col_types = FALSE` to quiet this message.
    #> • `` -> `...1`
    
    file.df
    #> # A tibble: 1 × 40
    #>    ...1 id    `A-1` `A-2` `A-3` `A-4` `A-5` `A-6` `A-7` `A-8` `A-9` `A-10`
    #>   <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
    #> 1     1 ae82      0    10    13    16    26    29    36     0     0      0
    #> # ℹ 28 more variables: `A-11` <dbl>, `A-12` <dbl>, `A-13` <dbl>, `A-14` <dbl>,
    #> #   `A-15` <dbl>, `A-16` <dbl>, `A-17` <dbl>, `A-18` <dbl>, `A-19` <dbl>,
    #> #   `A-20` <dbl>, `A-21` <dbl>, `A-22` <dbl>, `A-23` <dbl>, `A-24` <dbl>,
    #> #   `A-25` <dbl>, `A-26` <dbl>, `A-27` <dbl>, `A-28` <dbl>, `A-29` <dbl>,
    #> #   `A-30` <dbl>, `A-31` <dbl>, Kingdom <chr>, Phylum <chr>, Class <chr>,
    #> #   Order <chr>, Family <chr>, Genus <lgl>, Species <lgl>
    dplyr::glimpse(file.df)
    #> Rows: 1
    #> Columns: 40
    #> $ ...1    <dbl> 1
    #> $ id      <chr> "ae82"
    #> $ `A-1`   <dbl> 0
    #> $ `A-2`   <dbl> 10
    #> $ `A-3`   <dbl> 13
    #> $ `A-4`   <dbl> 16
    #> $ `A-5`   <dbl> 26
    #> $ `A-6`   <dbl> 29
    #> $ `A-7`   <dbl> 36
    #> $ `A-8`   <dbl> 0
    #> $ `A-9`   <dbl> 0
    #> $ `A-10`  <dbl> 0
    #> $ `A-11`  <dbl> 0
    #> $ `A-12`  <dbl> 0
    #> $ `A-13`  <dbl> 0
    #> $ `A-14`  <dbl> 0
    #> $ `A-15`  <dbl> 0
    #> $ `A-16`  <dbl> 0
    #> $ `A-17`  <dbl> 0
    #> $ `A-18`  <dbl> 0
    #> $ `A-19`  <dbl> 0
    #> $ `A-20`  <dbl> 0
    #> $ `A-21`  <dbl> 0
    #> $ `A-22`  <dbl> 0
    #> $ `A-23`  <dbl> 0
    #> $ `A-24`  <dbl> 0
    #> $ `A-25`  <dbl> 0
    #> $ `A-26`  <dbl> 0
    #> $ `A-27`  <dbl> 0
    #> $ `A-28`  <dbl> 0
    #> $ `A-29`  <dbl> 0
    #> $ `A-30`  <dbl> 0
    #> $ `A-31`  <dbl> 0
    #> $ Kingdom <chr> "d__Bacteria"
    #> $ Phylum  <chr> "Bacteroidota"
    #> $ Class   <chr> "Bacteroidia"
    #> $ Order   <chr> "Bacteroidales"
    #> $ Family  <chr> "Bacteroidales_BS"
    #> $ Genus   <lgl> NA
    #> $ Species <lgl> NA
    

    Example data:

    file.input <- 
    c("\",\"\"id\"\",\"\"A-1\"\",\"\"A-2\"\",\"\"A-3\"\",\"\"A-4\"\",\"\"A-5\"\",\"\"A-6\"\",\"\"A-7\"\",\"\"A-8\"\",\"\"A-9\"\",\"\"A-10\"\",\"\"A-11\"\",\"\"A-12\"\",\"\"A-13\"\",\"\"A-14\"\",\"\"A-15\"\",\"\"A-16\"\",\"\"A-17\"\",\"\"A-18\"\",\"\"A-19\"\",\"\"A-20\"\",\"\"A-21\"\",\"\"A-22\"\",\"\"A-23\"\",\"\"A-24\"\",\"\"A-25\"\",\"\"A-26\"\",\"\"A-27\"\",\"\"A-28\"\",\"\"A-29\"\",\"\"A-30\"\",\"\"A-31\"\",\"\"Kingdom\"\",\"\"Phylum\"\",\"\"Class\"\",\"\"Order\"\",\"\"Family\"\",\"\"Genus\"\",\"\"Species\"\"\"",
    "\"1,\"\"ae82\"\",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\"\"d__Bacteria\"\",\"\"Bacteroidota\"\",\"\"Bacteroidia\"\",\"\"Bacteroidales\"\",\"\"Bacteroidales_BS\"\",NA,NA\"" ) |> 
      paste0(collapse = "\n")
    
    cat(file.input)
    
    ",""id"",""A-1"",""A-2"",""A-3"",""A-4"",""A-5"",""A-6"",""A-7"",""A-8"",""A-9"",""A-10"",""A-11"",""A-12"",""A-13"",""A-14"",""A-15"",""A-16"",""A-17"",""A-18"",""A-19"",""A-20"",""A-21"",""A-22"",""A-23"",""A-24"",""A-25"",""A-26"",""A-27"",""A-28"",""A-29"",""A-30"",""A-31"",""Kingdom"",""Phylum"",""Class"",""Order"",""Family"",""Genus"",""Species"""
    "1,""ae82"",0,10,13,16,26,29,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,""d__Bacteria"",""Bacteroidota"",""Bacteroidia"",""Bacteroidales"",""Bacteroidales_BS"",NA,NA"
    

    Created on 2025-03-25 with reprex v2.1.1