c++parsingtextcsv

How can I read and parse CSV files in C++?


I need to load and use CSV file data in C++. At this point it can really just be a comma-delimited parser (ie don't worry about escaping new lines and commas). The main need is a line-by-line parser that will return a vector for the next line each time the method is called.

I found this article which looks quite promising: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp

I've never used Boost's Spirit, but am willing to try it. But only if there isn't a more straightforward solution I'm overlooking.


Solution

  • If you don't care about escaping comma and newline,
    AND you can't embed comma and newline in quotes (If you can't escape then...)
    then its only about three lines of code (OK 14 ->But its only 15 to read the whole file).

    std::vector<std::string> getNextLineAndSplitIntoTokens(std::istream& str)
    {
        std::vector<std::string>   result;
        std::string                line;
        std::getline(str,line);
    
        std::stringstream          lineStream(line);
        std::string                cell;
    
        while(std::getline(lineStream,cell, ','))
        {
            result.push_back(cell);
        }
        // This checks for a trailing comma with no data after it.
        if (!lineStream && cell.empty())
        {
            // If there was a trailing comma then add an empty element.
            result.push_back("");
        }
        return result;
    }
    

    I would just create a class representing a row.
    Then stream into that object:

    #include <iterator>
    #include <iostream>
    #include <fstream>
    #include <sstream>
    #include <vector>
    #include <string>
    
    class CSVRow
    {
        public:
            std::string_view operator[](std::size_t index) const
            {
                return std::string_view(&m_line[m_data[index] + 1], m_data[index + 1] -  (m_data[index] + 1));
            }
            std::size_t size() const
            {
                return m_data.size() - 1;
            }
            void readNextRow(std::istream& str)
            {
                std::getline(str, m_line);
    
                m_data.clear();
                m_data.emplace_back(-1);
                std::string::size_type pos = 0;
                while((pos = m_line.find(',', pos)) != std::string::npos)
                {
                    m_data.emplace_back(pos);
                    ++pos;
                }
                // This checks for a trailing comma with no data after it.
                pos   = m_line.size();
                m_data.emplace_back(pos);
            }
        private:
            std::string         m_line;
            std::vector<int>    m_data;
    };
    
    std::istream& operator>>(std::istream& str, CSVRow& data)
    {
        data.readNextRow(str);
        return str;
    }   
    int main()
    {
        std::ifstream       file("plop.csv");
    
        CSVRow              row;
        while(file >> row)
        {
            std::cout << "4th Element(" << row[3] << ")\n";
        }
    }
    

    But with a little work we could technically create an iterator:

    class CSVIterator
    {   
        public:
            typedef std::input_iterator_tag     iterator_category;
            typedef CSVRow                      value_type;
            typedef std::size_t                 difference_type;
            typedef CSVRow*                     pointer;
            typedef CSVRow&                     reference;
    
            CSVIterator(std::istream& str)  :m_str(str.good()?&str:nullptr) { ++(*this); }
            CSVIterator()                   :m_str(nullptr) {}
    
            // Pre Increment
            CSVIterator& operator++()               {if (m_str) { if (!((*m_str) >> m_row)){m_str = nullptr;}}return *this;}
            // Post increment
            CSVIterator operator++(int)             {CSVIterator    tmp(*this);++(*this);return tmp;}
            CSVRow const& operator*()   const       {return m_row;}
            CSVRow const* operator->()  const       {return &m_row;}
    
            bool operator==(CSVIterator const& rhs) {return ((this == &rhs) || ((this->m_str == nullptr) && (rhs.m_str == nullptr)));}
            bool operator!=(CSVIterator const& rhs) {return !((*this) == rhs);}
        private:
            std::istream*       m_str;
            CSVRow              m_row;
    };
    
    
    int main()
    {
        std::ifstream       file("plop.csv");
    
        for(CSVIterator loop(file); loop != CSVIterator(); ++loop)
        {
            std::cout << "4th Element(" << (*loop)[3] << ")\n";
        }
    }
    

    Now that we are in 2020 lets add a CSVRange object:

    class CSVRange
    {
        std::istream&   stream;
        public:
            CSVRange(std::istream& str)
                : stream(str)
            {}
            CSVIterator begin() const {return CSVIterator{stream};}
            CSVIterator end()   const {return CSVIterator{};}
    };
    
    int main()
    {
        std::ifstream       file("plop.csv");
    
        for(auto& row: CSVRange(file))
        {
            std::cout << "4th Element(" << row[3] << ")\n";
        }
    }