go

Parsing string to time with unknown layout


I'm having a csv file, and want to read:

  1. Header names
  2. Fields types

So, I wrote the below:

package main

import (
    "encoding/csv"
    "fmt"
    "os"
    "log"
    "reflect"
    "strconv"
)

func main() {
    filePath := "./file.csv"
    headerNames := make(map[int]string)
    headerTypes := make(map[int]string)
    // Load a csv file.
    f, _ := os.Open(filePath)
    // Create a new reader.
    r := csv.NewReader(f)
    // Read first row only
    header, err := r.Read()
    checkError("Some other error occurred", err)

    // Add mapping: Column/property name --> record index
    for i, v := range header {
        headerNames[i] = v
    }

    // Read second row
    record, err := r.Read()
    checkError("Some other error occurred", err)
    // Check record fields types
    for i, v := range record {
        var value interface{}
        if value, err = strconv.Atoi(v); err != nil {
            if value, err = strconv.ParseFloat(v, 64); err != nil {
                if value, err = strconv.ParseBool(v); err != nil {
                    if value, err = strconv.ParseBool(v); err != nil { // <== How to do this with unknown layout
                        // Value is a string
                        headerTypes[i] = "string"
                        value = v
                        fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
                    } else {
                        // Value is a timestamp
                        headerTypes[i] = "time"
                        fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
                    }
                } else {
                    // Value is a bool
                    headerTypes[i] = "bool"
                    fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
                }
            } else {
                // Value is a float
                headerTypes[i] = "float"
                fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
            }
        } else {
            // Value is an int
            headerTypes[i] = "int"
            fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
        }
    }

    for i, _ := range header {
        fmt.Printf("Header: %v \tis\t %v\n", headerNames[i], headerTypes[i])
    }
}

func checkError(message string, err error) {
    // Error Logging
    if err != nil {
        log.Fatal(message, err)
    }
}

And with csv file as:

name,age,developer
"Hasan","46.4","true"

I got an output as:

Header: name       is  string
Header: age        is  float
Header: developer  is  bool

The output is correct.

The thing that I could not do is the one is checking if the field is string as I do not know what layout the field could be.

I aware I can pasre string to time as per the format stated at https://go.dev/src/time/format.go, and can build a custom parser, something like:

    test, err := fmtdate.Parse("MM/DD/YYYY", "10/15/1983")
    if err != nil {
        panic(err)
    }

But this will work only (as per my knowledge) if I know the layout?

So, again my question is, how can I parse time, or what shall I do to be able to parse it, if I do not know the layout?


Solution

  • Thanks to the comment by Burak, I found the solution by using this package: github.com/araddon/dateparse

    
    // Normal parse.  Equivalent Timezone rules as time.Parse()
    t, err := dateparse.ParseAny("3/1/2014")
    
    // Parse Strict, error on ambigous mm/dd vs dd/mm dates
    t, err := dateparse.ParseStrict("3/1/2014")
    > returns error 
    
    // Return a string that represents the layout to parse the given date-time.
    layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
    > "Jan 2, 2006 3:04:05 PM"