javaapache-metamodel

how to insert data into csv file using apche meta model?


I am working in java project and i am using apache metamodel to insert data into csv file.

Code:

 public void insertIntoCSVFile(){
    File myFile = new File("unexisting_file.csv");    
    UpdateableDataContext dataContext = DataContextFactory.createCsvDataContext(myFile);
    final Schema schema = dataContext.getDefaultSchema();
    dataContext.executeUpdate(new UpdateScript() {
      public void run(UpdateCallback callback) {
    
        // CREATING A TABLE
        Table table = callback.createTable(schema, "my_table")
          .withColumn("name").ofType(VARCHAR)
          .withColumn("gender").ofType(CHAR)
          .withColumn("age").ofType(INTEGER)
          .execute();
     
        // INSERTING SOME ROWS
        callback.insertInto(table).value("name","John Doe").value("gender",'M').value("age",42).execute();
        callback.insertInto(table).value("name","Jane Doe").value("gender",'F').value("age",42).execute();
      }
    });
}

Whenever i call this method for same csv file it inserts data successfully but deleting old data and then inserting new data so every time only two row insert.

I want to insert multiple duplicate row by calling function multiple times means new rows should append and old rows remains same.

How can i achieve this?


Solution

  • Move the table creation code outside of the data insertion method. Every time you create the table, in the CSV file, it overwrites the previous table and its contents.

    Here is one approach:

    import java.io.File;
    import org.apache.metamodel.UpdateableDataContext;
    import org.apache.metamodel.csv.CsvDataContext;
    import org.apache.metamodel.DataContextFactory;
    import org.apache.metamodel.schema.Schema;
    import org.apache.metamodel.UpdateScript;
    import org.apache.metamodel.UpdateCallback;
    import org.apache.metamodel.schema.Table;
    import org.apache.metamodel.schema.ColumnType;
    
    public class App {
    
        public static void main(String[] args) {
            File myFile = new File("c:/tmp/unexisting_file.csv");
            UpdateableDataContext dataContext = DataContextFactory.createCsvDataContext(myFile);
            final Schema schema = dataContext.getDefaultSchema();
    
            final String tableName = "my_table";
            
            dataContext.executeUpdate(new UpdateScript() {
                @Override
                public void run(UpdateCallback callback) {
    
                    // CREATING A TABLE
                    Table table = callback.createTable(schema, tableName)
                            .withColumn("name").ofType(ColumnType.VARCHAR)
                            .withColumn("gender").ofType(ColumnType.CHAR)
                            .withColumn("age").ofType(ColumnType.INTEGER)
                            .execute();
                }
            });
    
            insertIntoCSVFile(dataContext, tableName);
            insertIntoCSVFile(dataContext, tableName);
        }
    
        public static void insertIntoCSVFile(final UpdateableDataContext dataContext,
                final String tableName) {
    
            dataContext.executeUpdate(new UpdateScript() {
                @Override
                public void run(UpdateCallback callback) {
    
                    String schemaName = dataContext.getDefaultSchema().getName();
                    Table table = dataContext.getTableByQualifiedLabel(schemaName + "." + tableName);
                          
                    // INSERTING SOME ROWS
                    callback.insertInto(table).value("name", "John Doe").value("gender", 'M').value("age", 42).execute();
                    callback.insertInto(table).value("name", "Jane Doe").value("gender", 'F').value("age", 42).execute();
                }
            });
        }
    }
    

    In this example, the main method is responsible for creating the table. This method then calls the data insertion method twice, passing in the relevant context and table name.

    The resulting file contents are:

    "name","gender","age"
    "John Doe","M","42"
    "Jane Doe","F","42"
    "John Doe","M","42"
    "Jane Doe","F","42"
    

    For further info, see the semantics for the creation of tables in CSV files - specifically:

    Create or overwrite the CSV file with a (new) table structure.

    Update

    You can prevent the data being overwritten by checking if the table already exists, before trying to create it (again).

    Here is a simple example showing that approach:

    First, change the table name so it matches the file name:

    final String tableName = "unexisting_file.csv";
    

    This is because MetaModel uses this as the default table name for tables in CSV files. We can use this to check if we have already created the table:

    if (!tableExists()) {
        createTable();
    }
    

    Using the above approach, here is a full example:

    import org.apache.metamodel.UpdateableDataContext;
    import org.apache.metamodel.UpdateScript;
    import org.apache.metamodel.UpdateCallback;
    import org.apache.metamodel.schema.Table;
    import org.apache.metamodel.csv.CsvConfiguration;
    import org.apache.metamodel.csv.CsvDataContext;
    import org.apache.metamodel.schema.ColumnType;
    import org.apache.metamodel.util.Resource;
    import org.apache.metamodel.util.FileResource;
    
    public class CsvDemo {
    
        private final String tableName;
        private final UpdateableDataContext dataContext;
        
        public CsvDemo() {
            this.tableName = "example.csv";
            Resource resource = new FileResource("c:/tmp/" + tableName);
            CsvConfiguration configuration = new CsvConfiguration();
            this.dataContext = new CsvDataContext(resource, configuration);
        }
        
        public void doWork() {
            if (!tableExists()) {
                createTable();
            }
            appendData();
        }
    
        private boolean tableExists() {
            return getTable() != null;
        }
        
        private Table getTable() {
            return dataContext.getDefaultSchema().getTableByName(tableName);
        }
    
        private void createTable() {
            dataContext.executeUpdate(new UpdateScript() {
                @Override
                public void run(UpdateCallback callback) {
                    callback.createTable(dataContext.getDefaultSchema(), tableName)
                            .withColumn("name").ofType(ColumnType.VARCHAR)
                            .withColumn("gender").ofType(ColumnType.CHAR)
                            .withColumn("age").ofType(ColumnType.INTEGER)
                            .execute();
                }
            });
        }
        
        private void appendData() {
            dataContext.executeUpdate(new UpdateScript() {
                final Table table = getTable();
                @Override
                public void run(UpdateCallback callback) {
                    callback.insertInto(table).value("name", "John Doe")
                            .value("gender", 'M').value("age", 42).execute();
                    callback.insertInto(table).value("name", "Jane Doe")
                            .value("gender", 'F').value("age", 42).execute();
                }
            });
        }
    
    }
    

    Now, you will only create the table in your CSV file if it does not already exist. And if it does exist, then your additional data will be appended to any data which is already in the file.