spring-bootspring-batchjava-11spring-scheduledspring-batch-tasklet

Read New File While Doing Processing For A Field In Spring Batch


I have a fixedlength input file reading by using SPRING BATCH. I have already implemented Job, Step, Processor, etc. Here are the sample code.

@Configuration
public class BatchConfig {
    
    private JobBuilderFactory jobBuilderFactory;
    private StepBuilderFactory stepBuilderFactory;
    
    @Value("${inputFile}") 
    private Resource resource;
    
    @Autowired
    public BatchConfig(JobBuilderFactory jobBuilderFactory, StepBuilderFactory stepBuilderFactory) {
        this.jobBuilderFactory = jobBuilderFactory;
        this.stepBuilderFactory = stepBuilderFactory;
    }
    
    @Bean
    public Job job() {
        return this.jobBuilderFactory.get("JOB-Load")
                .start(fileReadingStep())
                .build();
    }
    
    @Bean
    public Step fileReadingStep() {
        return stepBuilderFactory.get("File-Read-Step1")
                .<Employee,EmpOutput>chunk(1000)
                .reader(itemReader())
                .processor(new CustomFileProcesser())
                .writer(new CustomFileWriter())
                .faultTolerant()
                .skipPolicy(skipPolicy())
                .build();
    }
    
    
    
    @Bean
    public FlatFileItemReader<Employee> itemReader() {
        FlatFileItemReader<Employee> flatFileItemReader = new FlatFileItemReader<Employee>();
        flatFileItemReader.setResource(resource);
        flatFileItemReader.setName("File-Reader");
        flatFileItemReader.setLineMapper(LineMapper());
        return flatFileItemReader;
    }

    @Bean
    public LineMapper<Employee> LineMapper() {
        DefaultLineMapper<Employee> defaultLineMapper = new DefaultLineMapper<Employee>();
        FixedLengthTokenizer fixedLengthTokenizer = new FixedLengthTokenizer();
        fixedLengthTokenizer.setNames(new String[] { "employeeId", "employeeName", "employeeSalary" });
        fixedLengthTokenizer.setColumns(new Range[] { new Range(1, 9), new Range(10, 20), new Range(20, 30)});
        fixedLengthTokenizer.setStrict(false);

        defaultLineMapper.setLineTokenizer(fixedLengthTokenizer);
        defaultLineMapper.setFieldSetMapper(new CustomFieldSetMapper());

        return defaultLineMapper;
    }
    
    @Bean
    public JobSkipPolicy skipPolicy() {
        return new JobSkipPolicy();
    }
    
}

For Processing I have added some sample code What I need, But if I add BufferedReader here then it's taking more times to do the job.

@Component
public class CustomFileProcesser implements ItemProcessor<Employee, EmpOutput> {

    @Override
    public EmpOutput process(Employee item) throws Exception {
        EmpOutput emp = new EmpOutput();
        emp.setEmployeeSalary(checkSal(item.getEmployeeSalary()));
        return emp;
    }

    public String checkSal(String sal) {
        
        // need to read the another file 
        // required to do some kind of validation
        // after that final result need to return

        File f1 = new File("C:\\Users\\John\\New\\salary.txt");

    FileReader fr;
    try {
        fr = new FileReader(f1);
        BufferedReader br = new BufferedReader(fr);
        String s = br.readLine();
        
        while (s != null) {
            String value = s.substring(5, 7);
            if(value.equals(sal))
                sal = value;
            else
                sal = "5000";
            s = br.readLine();
        }
        
    } catch (Exception e) {
        e.printStackTrace();
    }
        
        return sal;
    }
    
    // other fields need to check by reading different different file.
    // These new files contains more than 30k records. 
    // all are fixedlength file. 
    // I need to get the field by giving the index


}

While doing the processing for one or more field, I need to check In another file by reading that file (it's a file I will read from fileSystem/Cloud).

While processing the data for 5 fields I need to read 5 different different file again, I will check the fields details inside those file and then I will gererate the result , That result will process forther.


Solution

  • You can cache the content of the file in memory and do your check against the cache instead of re-reading the entire file from disk for each item.

    You can find an example here: Spring Batch With Annotation and Caching.