dataframegogota

Skipping rows when using gota ReadCSV


I'm coming from pandas which has a convenient skiprows parameter like this:

df = pd.read_csv(tsv_file, sep="\t", encoding=encoding, skiprows=3, thousands=",")

How can I do this with gota?


Solution

  • You can use encoding/csv to manipulate rows first.

    There are 2 methods.

    1. Using csv.Read() to read each rows.
    2. Slice the records.

    This is the example, please see comments to see first and second method.

    package main
    
    import (
        "encoding/csv"
        "fmt"
        "os"
    
        "github.com/go-gota/gota/dataframe"
    )
    
    func main() {
        f, err := os.Open("sample.csv")
        if err != nil {
            panic(err.Error())
        }
        defer f.Close()
    
        csv := csv.NewReader(f)
    
        err = SkipRows(csv, 3) // this will skip rows in csv FIRST METHOD
        if err != nil {
            panic(err.Error())
        }
    
        records, err := csv.ReadAll() // get all records
        if err != nil {
            panic(err.Error())
        }
    
        // records = records[3:] // SECOND METHOD
        df := dataframe.LoadRecords(records)
    
        fmt.Println(df.Records())
        fmt.Println(df.Names())
    }
    
    func SkipRows(csv *csv.Reader, skip int) (err error) {
        for i := 0; i < skip; i++ {
            _, err = csv.Read()
            if err != nil {
                return
            }
        }
        return
    }
    
    

    sample.csv

    1,1,1
    2,2,2
    3,3,3
    header1,header2,header3
    5,5,5
    6,6,6
    7,7,7
    8,8,8
    9,9,9
    10,10,10
    11,11,11
    12,12,12
    13,13,13
    14,14,14
    15,15,15
    16,16,16
    17,17,17
    18,18,18
    19,19,19
    20,20,20
    21,21,21
    

    output

    [[header1 header2 header3] [5 5 5] [6 6 6] [7 7 7] [8 8 8] [9 9 9] [10 10 10] [11 11 11] [12 12 12] [13 13 13] [14 14 14] [15 15 15] [16 16 16] [17 17 17] [18 18 18] [19 19 19] [20 20 20] [21 21 21]]
    [header1 header2 header3]
    

    In the end of ReadCSV function is calling LoadRecords like my example. https://github.com/go-gota/gota/blob/f70540952827cfc8abfa1257391fd33284300b24/dataframe/dataframe.go#L1360