javaspringspring-bootspring-batchspring-batch-tasklet

How to Read only selected Data that satisfies a condition from csv using Spring Batch


I have a csv with [id,info] and I have access to a service which returns me a list of ids(say ActiveIds). I would like to read from the csv only those ids that are present in ActiveIds and would like to access info from the first csv for the info of these selected ids. what are multiple optimised approach for these?


Solution

  • First create the following beans for reading, processing and writing the data to a new csv file

    @Bean
    public FlatFileItemReader<MyObject> reader() {
        FlatFileItemReader<MyObject> reader = new FlatFileItemReader<>();
        reader.setResource(new ClassPathResource("data.csv"));
        reader.setLineMapper(new DefaultLineMapper<MyObject>() {{
            setLineTokenizer(new DelimitedLineTokenizer() {{
                setNames("id", "info");
            }});
            setFieldSetMapper(new BeanWrapperFieldSetMapper<MyObject>() {{
                setTargetType(MyObject.class);
            }});
        }});
        return reader;
    }
    
    @Bean
    public ItemProcessor<MyObject, MyObject> processor(List<String> activeIds) {
        return item -> {
            if (activeIds.contains(item.getId())) {
                return item;
            } else {
                return null;
            }
        };
    }
    
    @Bean
    public FlatFileItemWriter<MyObject> writer() {
        FlatFileItemWriter<MyObject> writer = new FlatFileItemWriter<>();
        writer.setResource(new FileSystemResource("filtered_data.csv"));
        writer.setLineAggregator(new DelimitedLineAggregator<MyObject>() {{
            setDelimiter(",");
            setFieldExtractor(new BeanWrapperFieldExtractor<MyObject>() {{
                setNames(new String[]{"id", "info"});
            }});
        }});
        return writer;
    }
    
    @Bean
    public Job filterJob(JobBuilderFactory jobs, StepBuilderFactory steps,
                         FlatFileItemReader<MyObject> reader, ItemProcessor<MyObject, MyObject> processor,
                         FlatFileItemWriter<MyObject> writer) {
        Step step = steps.get("filterStep")
                .<MyObject, MyObject>chunk(10)
                .reader(reader)
                .processor(processor)
                .writer(writer)
                .build();
    
        return jobs.get("filterJob")
                .incrementer(new RunIdIncrementer())
                .flow(step)
                .end()
                .build();
    }
    

    Now create runJob method as below

    @Autowired
    private JobLauncher jobLauncher;
    
    @Autowired
    private Job filterJob;
    
    public void runJob(List<String> activeIds) {
        JobParameters jobParameters = new JobParametersBuilder()
                .addString("ids", String.join(",", activeIds))
                .toJobParameters();
        jobLauncher.run(filterJob, jobParameters);
    }
    

    Now, this runJob method can read the data from the CSV file, filter out any data that has an id not present in the list of active ids, and write the filtered data to a new CSV file as below if you pass activeIds

    runJob(activeIds);