javaapache-poidocumentumdocumentum-dfc

Inserting data into list of map taking too much time in Java


I have a task to send the automated report daily. So, I'm basically writing the query result set/collection into list of map so that I can write that data to excel. Here, I have the following method that inserts collection data into list of map. The problem is this method takes 1 hour 20 minutes to insert data from the collection which has 3000-3500 rows and 14 columns. In my code I have 5 similar queries to run and for each query it is taking same amount of time. Can you please help me in optimizing the code so that it takes less amount of time?

// avoided following method

public static List<Map<String, Object>> insertAttrValues(IDfCollection dfCollection, List<String> attributes) throws DfException {

    if (dfCollection == null || attributes == null) {
        throw new MissingParameterException("collection and attributes");
    }

    List<Map<String, Object>> dataList = new ArrayList<>();

    while (dfCollection.next()) {
        Map<String, Object> map = new LinkedHashMap<>(attributes.size());

        for (String attribute: attributes) {
            map.put(attribute, dfCollection.getString(attribute));
        }
        dataList.add(map);
    }

    return dataList;
}

Edit: Sorry, placed important parts of code and used collection directly instead of inserting values in map and process later.

Starting Point:


@SpringBootApplication
public class ImmsClinicalReportApplication {

    public static void main(String[] args) {
        ApplicationContext applicationContext = SpringApplication.run(ImmsClinicalReportApplication.class, args);
        init(applicationContext);
    }

    private static void init(@NotNull ApplicationContext applicationContext) {
        ClinicalReportController clinicalReportController = (ClinicalReportController) applicationContext.getBean("clinicalReportController");

        IDfSession dfSession = null;

        try {
            dfSession = clinicalReportController.getSession();
            clinicalReportController.execute(dfSession);
            sendEmail(applicationContext, clinicalReportController);
        } catch (DfException | IOException e) {
            e.printStackTrace();
        } finally {
            try {
                clinicalReportController.cleanSession(dfSession);
            } catch (DfException e) {
                e.printStackTrace();
            }
        }
    }
}

@Controller("clinicalReportController")
@PropertySource("classpath:application.properties")
public class ClinicalReportController {

    private static final Logger logger = Logger.getLogger(ClinicalReportController.class);

    private final SessionHelper sessionHelper;
    private final DqlHelper dqlHelper;
    private final AppProperties appProperties;

    @Value("${report_path}")
    private String XLSX_FILE_PATH;

    private static final String[] moduleTypes = {
        "Clin Protocol", "Clin Investigator Brochure", "Clin Core Text",
        "Clin Process Documentation", "Clin Supporting Information"
    };

    @Autowired
    public ClinicalReportController(DqlHelper dqlHelper, SessionHelper sessionHelper, AppProperties appProperties) {
        this.dqlHelper = dqlHelper;
        this.sessionHelper = sessionHelper;
        this.appProperties = appProperties;
    }

    /**
     * Method that processes the report
     * @param dfSession dfSession
     * @throws DfException DfException
     * @throws IOException IOException
     */
    public void execute(IDfSession dfSession) throws DfException, IOException {

        StopWatch timer = new StopWatch();

        for (int i = 0; i < moduleTypes.length; i++) {
            // start timer
            timer.start();
            IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);

            List<String> attributes = new ArrayList<>(dfCollection.getAttrCount());

            for (int j = 0; j < dfCollection.getAttrCount(); j++) {
                attributes.add(dfCollection.getAttr(j).getName());
            }

            // stop timer
            timer.stop();
            // Each query takes 20 mins of time
            /* Sample query: select d.r_object_id, d.object_name, d.title,
            d.imms_extreleased_date, d.imms_extreleased_reason, d.imms_extreleaser,
            d.imms_protocol_number, d.imms_protocol_number_rep, d.keywords,
            d.imms_compound_number, d.imms_module_type, d.imms_prereleaser,
            d.imms_prereleased_date, f.r_folder_path from imms_document d,
            dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700'
            and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date >
            date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)*/
            logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
                    timer.getTotalTimeSeconds()/60 + " minutes");

            // List<Map<String, Object>> resultSet = ImmsUtils.insertAttrValues(dfCollection, attributes);

            if (i == 0) {
                processReport(dfCollection, moduleTypes[i], attributes);
            } else {
                updateReport(dfCollection, moduleTypes[i], attributes);
            }
            cleanCollection(dfCollection);
        }
    }

    /**
     * Method process for remaining queries/sheets
     * @param resultSet resultSet
     * @param objectType objectType
     * @param attributes attributes
     * @throws IOException IOException
     */
    private void updateReport(IDfCollection resultSet, String objectType, List<String> attributes) throws IOException, DfException {
        Workbook workbook = new XSSFWorkbook(new FileInputStream(XLSX_FILE_PATH));
        excelWriterAndOperateOutputStream(resultSet, objectType, workbook, attributes);
    }

    /**
     * Method that writes data to excel sheets
     * @param dfCollection dfCollection
     * @param sheet2 sheet2
     * @param workbook workbook
     * @param attributes 
 
     * Using collection directly. Not sure where is the issue in following method, writing data to sheet is also taking 50 minutes of time
     */
     private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2, Workbook workbook, List<String> attributes) throws DfException {
        Sheet sheet;
        Row row;

        sheet = sheet2;

        Object[] values = new Object[attributes.size()];
        StopWatch timer = new StopWatch();
        
        // moved outside of loop 
        // TODO: avoid regex, use other logic 
        String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
        Pattern datePattern = Pattern.compile(dateRegex);
        // avoid SDF and Date and
        // TODO: use java.time - maybe LocalDate
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
        Date date = null;

        CellStyle dateCellStyle = workbook.createCellStyle();
        dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));

        timer.start();
        while (dfCollection.next()) {
            for (int i = 0; i < attributes.size(); i++) {
                values[i] = dfCollection.getString(attributes.get(i));
            }

            int lastRow = sheet.getLastRowNum();
            row = sheet.createRow(++lastRow);
            int cellNum = 0;


            for (Object value: values) {
                Cell cell = row.createCell(cellNum++);
                if (datePattern.matcher(value.toString()).matches()) {
                    try {
                        date = simpleDateFormat.parse(value.toString());
                    } catch (ParseException e) {
                        e.printStackTrace();
                    }
                    cell.setCellValue(date);
                    cell.setCellStyle(dateCellStyle);
                } else {
                    cell.setCellValue(value.toString());
                }
            }
        }
        timer.stop();
        // Taking 50 mins of time to write collection data
        // Log: Time taken for writing data 54.567404175 minutes
        logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");


        // Resize all columns to fit the content size
        for (int i = 0; i < attributes.size(); i++) {
            sheet.autoSizeColumn(i);
        }
    }

    /**
     * Method to create sheet, set fonts and colors
     * @param moduleType moduleType
     * @param workbook workbook
     * @return Sheet
     */
     private Sheet createSheet(String moduleType, Workbook workbook) {
        return workbook.createSheet(moduleType);
     }

    /**
     * Method to process first query/sheet
     * @param dfCollection dfCollection
     * @param moduleType moduleType
     * @param attributes attributes
     * @throws IOException IOException
     */
     private void processReport(IDfCollection dfCollection, String moduleType, List<String> attributes) throws IOException, DfException {
        // Create a Workbook - for xlsx
        Workbook workbook = new XSSFWorkbook();

        /*CreationHelper helps us create instances of various things like DataFormat,
          Hyperlink, RichTextString etc, in a format (HSSF, XSSF) independent way*/
        
        workbook.getCreationHelper();

        excelWriterAndOperateOutputStream(dfCollection, moduleType, workbook, attributes);
    }

    /**
     * Method that writes and saves data to file
     * @param resultSet resultSet
     * @param moduleType  moduleType
     * @param workbook workbook
     * @param attributes attributes
     * @throws IOException IOException
     */
    private void excelWriterAndOperateOutputStream(IDfCollection resultSet, String moduleType, Workbook workbook, List<String> attributes) throws IOException, DfException {
        Sheet sheet = createSheet(moduleType, workbook);

        CellStyle cellStyle = setFontsAndColors(workbook);

        // Create a Row
        Row headerRow = sheet.createRow(0);
        // Create cells
        for (int i = 0; i < attributes.size(); i++) {
            Cell cell = headerRow.createCell(i);
            cell.setCellValue(attributes.get(i));
            cell.setCellStyle(cellStyle);
        }

        writeToSheet(resultSet, workbook.getSheet(moduleType), workbook, attributes);
        // Write the output to the file
        FileOutputStream fileOutputStream = new FileOutputStream(XLSX_FILE_PATH);
        workbook.write(fileOutputStream);
        // close the file
        fileOutputStream.close();
        // close the workbook
        workbook.close();
    }

    @NotNull
    private CellStyle setFontsAndColors(Workbook workbook) {
        CellStyle cellStyle = workbook.createCellStyle();

        // Create a Font for styling header cells
        Font headerFont = workbook.createFont();
        headerFont.setBold(false);
        headerFont.setFontHeightInPoints((short) 12);
        headerFont.setColor(IndexedColors.GREEN.getIndex());
        cellStyle.setFont(headerFont);
        return cellStyle;
   }

    /**
     * Get IDfSession object
     * @return IDfSession
     * @throws DfException DfException
     */
    public IDfSession getSession() throws DfException {
        IDfSession dfSession;

        IDfSessionManager sessionManager = sessionHelper.getDfSessionManager(appProperties.getRepository(), appProperties.getUsername(), appProperties.getPassword());
        dfSession = sessionManager.getSession(appProperties.getRepository());
        return dfSession;
    }

    /**
     * Clean IDfCollection
     * @param dfCollection dfCollection
     */
    public void cleanCollection(IDfCollection dfCollection) {
        dqlHelper.cleanup(dfCollection);
    }

    /**
     * Clean IDfSession
     * @param dfSession dfSession
     */
    public void cleanSession(IDfSession dfSession) throws DfException {
        sessionHelper.cleanSession(dfSession);
    }
    }

Solution

  • I think the main problem is with query. Try below steps:

    select * from imms_document d, dm_folder f where d.i_folder_id=f.r_object_id and i_cabinet_id='0c0033ec80000700' and d.imms_module_type = 'Clin Protocol' and d.imms_extreleased_date > date('31/12/2016', 'dd/mm/yyyy') and f.r_folder_path is not nullstring enable (ROW_BASED)

    included_attributes=r_object_id,object_name,title,imms_extreleased_date,imms_extreleased_reason,imms_extreleaser,imms_protocol_number,imms_protocol_number_rep,keywords,imms_compound_number,imms_module_type,imms_prereleaser,imms_prereleased_date,r_folder_path

    Do the following in your AppProperties class file:

    @Component
    public class AppProperties {
    
       /**
        *other fields
        */
    
        @Getter
        @Value("${included_attributes}")
        private String[] includedAttributes;
    
    }
    

    Now in your execute() method, modify the code to use only the attributes you need to get the data for.

    public void execute(IDfSession dfSession) throws DfException, IOException {
    
        StopWatch timer = new StopWatch();
    
        for (int i = 0; i < moduleTypes.length; i++) {
            // start timer
            timer.start();
            IDfCollection dfCollection = dqlHelper.query(dfSession, QueryConstant.immsQueries[i]);
            // stop timer
            timer.stop();
            logger.info("Time taken to run query: " + QueryConstant.immsQueries[i] + ": " +
                    timer.getTotalTimeSeconds() + " seconds");    
            // attributes to be added
            List<String> attributes = new ArrayList<>();
            // Get included attributes as list
            List<String> includedAttributes = Arrays.asList(appProperties.getIncludedAttributes());
    
            for (int j = 0; j < dfCollection.getAttrCount(); j++) {
                // check for the attribute in included attributes and add if exists
                if (hasAttribute(includedAttributes, dfCollection.getAttr(j).getName())) {
                    attributes.add(dfCollection.getAttr(j).getName());
                }
            }
    
    
            if (i == 0) {
                processReport(dfCollection, moduleTypes[i], attributes);
            } else {
                updateReport(dfCollection, moduleTypes[i], attributes);
            }
            cleanCollection(dfCollection);
        }
    }
    
    public static boolean hasAttribute(@NotNull List<String> attributes, String attribute) {
        for(String attr : attributes){
            if(attribute.contains(attr)){
                return true;
            }
        }
        return false;
    }
    

    Directly use collection for POI structures and no need to insert data in array and iterate over it again.

    private void writeToSheet(@NotNull IDfCollection dfCollection, Sheet sheet2,
                                  @NotNull Workbook workbook, List<String> attributes) throws DfException {
            Sheet sheet;
            Row row;
    
            sheet = sheet2;
    
            StopWatch timer = new StopWatch();
    
            String dateRegex = "^([0-9]{4})/([0-1][0-9])/([0-3][0-9])\\s([0-1][0-9]|[2][0-3]):([0-5][0-9]):([0-5][0-9])$";
            Pattern datePattern = Pattern.compile(dateRegex);
    
            DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("yyyy/MM/dd HH:mm:ss");
    
            CellStyle dateCellStyle = workbook.createCellStyle();
            dateCellStyle.setDataFormat(workbook.getCreationHelper().createDataFormat().getFormat("yyyy/MM/dd HH:mm:ss"));
    
            int lastRow = 0;
    
            timer.start();
            while (dfCollection.next()) {
                row = sheet.createRow(++lastRow);
                int cellNum = 0;
    
                for (String attribute : attributes) {
                    Object value = dfCollection.getString(attribute);
    
                    Cell cell = row.createCell(cellNum++);
    
                    if (datePattern.matcher(value.toString()).matches()) {
                        cell.setCellValue(LocalDateTime.parse(value.toString(), timeFormatter));
                        cell.setCellStyle(dateCellStyle);
                    } else {
                        cell.setCellValue(value.toString());
                    }
                }
            }
            timer.stop();
            logger.info("Time taken for writing data " + timer.getTotalTimeSeconds()/60 + " minutes");
    
    
            // Resize all columns to fit the content size
            for (int i = 0; i < attributes.size(); i++) {
                sheet.autoSizeColumn(i);
            }
        }