pythonazuretext-extractionazure-form-recognizer

unable to access azure document intelligence using endpoint and keys


I am trying to extract text from pdf using document intelligence from azure, code I am using:

from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential

endpoint = "https://xyz.cognitiveservices.azure.com/"
key = ""

def format_bounding_region(bounding_regions):
    if not bounding_regions:
        return "N/A"
    return ", ".join("Page #{}: {}".format(region.page_number, format_polygon(region.polygon)) for region in bounding_regions)

def format_polygon(polygon):
    if not polygon:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])


def analyze_general_documents():
    # sample document
    docUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"

    document_analysis_client = DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))

    poller = document_analysis_client.begin_analyze_document_from_url(
            "prebuilt-document", docUrl)
    result = poller.result()

    for style in result.styles:
        if style.is_handwritten:
            print("Document contains handwritten content: ")
            print(",".join([result.content[span.offset:span.offset + span.length] for span in style.spans]))

    print("----Key-value pairs found in document----")
    for kv_pair in result.key_value_pairs:
        if kv_pair.key:
            print(
                    "Key '{}' found within '{}' bounding regions".format(
                        kv_pair.key.content,
                        format_bounding_region(kv_pair.key.bounding_regions),
                    )
                )
        if kv_pair.value:
            print(
                    "Value '{}' found within '{}' bounding regions\n".format(
                        kv_pair.value.content,
                        format_bounding_region(kv_pair.value.bounding_regions),
                    )
                )

    for page in result.pages:
        print("----Analyzing document from page #{}----".format(page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            print(
                "...Line # {} has text content '{}' within bounding box '{}'".format(
                    line_idx,
                    line.content,
                    format_polygon(line.polygon),
                )
            )

        for word in page.words:
            print(
                "...Word '{}' has a confidence of {}".format(
                    word.content, word.confidence
                )
            )

        for selection_mark in page.selection_marks:
            print(
                "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
                    selection_mark.state,
                    format_polygon(selection_mark.polygon),
                    selection_mark.confidence,
                )
            )

    for table_idx, table in enumerate(result.tables):
        print(
            "Table # {} has {} rows and {} columns".format(
                table_idx, table.row_count, table.column_count
            )
        )
        for region in table.bounding_regions:
            print(
                "Table # {} location on page: {} is {}".format(
                    table_idx,
                    region.page_number,
                    format_polygon(region.polygon),
                )
            )
        for cell in table.cells:
            print(
                "...Cell[{}][{}] has content '{}'".format(
                    cell.row_index,
                    cell.column_index,
                    cell.content,
                )
            )
            for region in cell.bounding_regions:
                print(
                    "...content on page {} is within bounding box '{}'\n".format(
                        region.page_number,
                        format_polygon(region.polygon),
                    )
                )
    print("----------------------------------------")


if __name__ == "__main__":
    analyze_general_documents()

error:

Traceback (most recent call last):
  File "c:\Users\SESA70\Desktop\azure\text_extract.py", line 116, in <module>
    analyze_general_documents()
  File "c:\Users\SESA737860\Desktop\azure\text_extract.py", line 24, in analyze_general_documents     
    poller = document_analysis_client.begin_analyze_document_from_url(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\ai\formrecognizer\_document_analysis_client.py", line 198, in begin_analyze_document_from_url
    return _client_op_path.begin_analyze_document(  # type: ignore
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\ai\formrecognizer\_generated\v2023_07_31\operations\_document_models_operations.py", line 518, in begin_analyze_document
    raw_result = self._analyze_document_initial(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\ai\formrecognizer\_generated\v2023_07_31\operations\_document_models_operations.py", line 443, in _analyze_document_initial
    raise HttpResponseError(response=response)
azure.core.exceptions.HttpResponseError: (AuthenticationTypeDisabled) Key based authentication is disabled for this resource.
Code: AuthenticationTypeDisabled
Message: Key based authentication is disabled for this resource.

I believe the code is perfectly fine, why is it I am getting this error is it because my organisation didnt provide rights to use this resource or am i missing anything.

I am new to azure please correct me where I'm going wrong, thankyou.


Solution

  • I tried to replicate the error you are getting. I copied your code and replaced the endpoint and credential key. I just ran it in the terminal it is working perfectly fine. You can check this!

    From what I see, you do not have sufficient access or privilege to access that resource. That is the issue.

    https://i.sstatic.net/2fEfiZbM.png