javaspringamazon-web-servicesspring-bootamazon-textract

AWS Textract using QUERIES feature with Java/ Spring Boot


I'm working on a spring boot project that need to use AWS Textract. I want to use the Query Feature. But I can't figure out where to include the query.

I looked into aws documentation and used their example code for java sdk v2. But I can't figure out where to add the query? As a example "what is customer's name". where should I add that to query the document.

Here is the code I'm using

 public static void analyzeDoc(TextractClient textractClient, String sourceDoc) {

        try {
            InputStream sourceStream = new FileInputStream(new File(sourceDoc));
            SdkBytes sourceBytes = SdkBytes.fromInputStream(sourceStream);

            // Get the input Document object as bytes
            Document myDoc = Document.builder()
                    .bytes(sourceBytes)
                    .build();

            List<FeatureType> featureTypes = new ArrayList<>();
            featureTypes.add(FeatureType.QUERIES);

            AnalyzeDocumentRequest analyzeDocumentRequest = AnalyzeDocumentRequest.builder()
                    .featureTypes(featureTypes)
                    .document(myDoc)
                    .build();

            AnalyzeDocumentResponse analyzeDocument = textractClient.analyzeDocument(analyzeDocumentRequest);
            List<Block> docInfo = analyzeDocument.blocks();
            Iterator<Block> blockIterator = docInfo.iterator();

            while(blockIterator.hasNext()) {
                Block block = blockIterator.next();
                System.out.println("The block type is " + block.blockType().toString());
            }

        } catch (TextractException | FileNotFoundException e) {

            System.err.println(e.getMessage());
            System.exit(1);
        }


    }

Solution

  • After spending a lot of time I finally figure out a way to include the query. Hope this will help someone who is stuck like me.

    public static void analyzeDoc(TextractClient textractClient, String sourceDoc) {
    
        try {
            InputStream sourceStream = new FileInputStream(sourceDoc);
            SdkBytes sourceBytes = SdkBytes.fromInputStream(sourceStream);
    
            // Get the input Document object as bytes
            Document myDoc = Document.builder()
                    .bytes(sourceBytes)
                    .build();
    
            List<FeatureType> featureTypes = new ArrayList<>();
            featureTypes.add(FeatureType.QUERIES);
            
            // building the Query config object with desired query to extract information
            QueriesConfig queryConfig = QueriesConfig.builder()
                    .queries(Query.builder().text("YOUR QUERY GOES HERE").build())
                    .build();
    
    
            AnalyzeDocumentRequest analyzeDocumentRequest = AnalyzeDocumentRequest.builder()
                    .featureTypes(featureTypes)
                    .queriesConfig(queryConfig)
                    .document(myDoc)
                    .build();
    
            AnalyzeDocumentResponse analyzeDocument = textractClient.analyzeDocument(analyzeDocumentRequest);
            List<Block> docInfo = analyzeDocument.blocks();
            Iterator<Block> blockIterator = docInfo.iterator();
    
            while (blockIterator.hasNext()) {
                Block block = blockIterator.next();
                System.out.println("The block type is " + block.blockType().toString());
                if (block.blockType().toString().equals("QUERY_RESULT")) {
                    System.out.println(block.text());
                }
            }
    
        } catch (TextractException | FileNotFoundException e) {
    
            System.err.println(e.getMessage());
            System.exit(1);
        }
    
    
    }