fortrangfortran

how to read a csv file from fortran if the data is different for each colum


I am trying to rea csv file in fortran, but failed.

I have this data in my csv file

0,2013/02/20,24.448
1,2013/02/20,24.448
2,2013/02/20,24.545
3,2013/02/20,24.448
4,2013/02/20,24.351
5,2013/02/20,24.351

And I try to define it like this

real,allocatable :: array(:,:) But, when I print, the printed data looks all wrong. I assume it is because I define it as real but apparently it is not.

This is the read command I use

do
     num_rows = num_rows + 1
     read(10, *, iostat=io) data(num_rows, 1), data(num_rows, 2), data(num_rows, 3)
     if (io /= 0) exit ! Exit the loop when there is no data left to read
  end do

How can I solve this ?


Solution

  • You can't read what is essentially a character (e.g. 2013/02/20) into a real variable.

    Best to read CSV files into character arrays first then convert individual characters into reals by reading as "internal files"; e.g. read( string, * ) real_value

    Once you have an array of characters you can pick off individual columns.

    Provided your CSV file is comma-delimited and none of those commas are actually part of cell data then the following will read your file.

    module MyModule
       implicit none
       integer, parameter :: MAXLEN = 1000
    
       type Element
          character(len=:), allocatable :: value     ! single value in a cell
       end type Element
    
       type Line
          type(Element), allocatable :: column(:)    ! collection of cells in a row
       end type Line
    
       type Spreadsheet
          type(Line), allocatable :: row(:)          ! collection of rows
       end type Spreadsheet
    
    
    contains
       type(Spreadsheet) function readCSVFile( filename )
          character(len=*), intent(in) :: filename
          character(len=MAXLEN) buffer
          integer i, j
          integer stat
          integer numRows, numCols
          integer start, lineLength
    
          ! Read file once to determine the number of rows
          open( 10, file=filename)
          numRows = 0
          do
             read( 10, *, iostat=stat )
             if ( stat /= 0 ) exit
             numRows = numRows + 1
          end do
          allocate( readCSVFile%row(numRows) )
    
          ! Rewind and read individual rows
          rewind( 10 )
          do i = 1, numRows
             read( 10, "(a)" ) buffer
             lineLength = len_trim( buffer )
    
             ! Count number of cells (note: may end in a comma, i.e. empty cell)
             numCols = 1
             do j = 1, lineLength
                if ( buffer(j:j) == ',' ) numCols = numCols + 1
             end do
             allocate( readCSVFile%row(i)%column(numCols) )
    
             ! Take data delimited by commas
             numCols = 1
             start = 1
             do j = 1, len_trim( buffer )
                if ( buffer(j:j) == ',' ) then
                   readCSVFile%row(i)%column(numCols)%value = buffer(start:j-1)
                   start = j + 1
                   numCols = numCols + 1
                end if
             end do
             if ( start <= lineLength ) readCSVFile%row(i)%column(numCols)%value = buffer(start:lineLength )
    
          end do
          close( 10 )
    
       end function readCSVFile
    
    end module MyModule
    
    !===============================================================================
    
    program test
       use MyModule
       implicit none
       type(Spreadsheet) S
       real, allocatable :: R(:)
       integer i, j
       
       S = readCSVFile( "test.csv" )
       allocate( R(size(S%row)) )
    
       do i = 1, size( S%row )
          write( *, "( *(a20) )" ) ( S%row(i)%column(j)%value, j = 1, size( S%row(i)%column ) )
          read( S%row(i)%column(3)%value, * ) R(i)
       end do
    
       ! Just the 3rd column, as real
       write( *, * ) "Third column: ", R
    
    end program test
    

    Output:

                       0          2013/02/20              24.448
                       1          2013/02/20              24.448
                       2          2013/02/20              24.545
                       3          2013/02/20              24.448
                       4          2013/02/20              24.351
                       5          2013/02/20              24.351
     Third column:    24.4480000       24.4480000       24.5450001       24.4480000       24.3509998       24.3509998