I am trying to extract text from pdf using document intelligence from azure, code I am using:
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential
endpoint = "https://xyz.cognitiveservices.azure.com/"
key = ""
def format_bounding_region(bounding_regions):
if not bounding_regions:
return "N/A"
return ", ".join("Page #{}: {}".format(region.page_number, format_polygon(region.polygon)) for region in bounding_regions)
def format_polygon(polygon):
if not polygon:
return "N/A"
return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])
def analyze_general_documents():
# sample document
docUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
document_analysis_client = DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))
poller = document_analysis_client.begin_analyze_document_from_url(
"prebuilt-document", docUrl)
result = poller.result()
for style in result.styles:
if style.is_handwritten:
print("Document contains handwritten content: ")
print(",".join([result.content[span.offset:span.offset + span.length] for span in style.spans]))
print("----Key-value pairs found in document----")
for kv_pair in result.key_value_pairs:
if kv_pair.key:
print(
"Key '{}' found within '{}' bounding regions".format(
kv_pair.key.content,
format_bounding_region(kv_pair.key.bounding_regions),
)
)
if kv_pair.value:
print(
"Value '{}' found within '{}' bounding regions\n".format(
kv_pair.value.content,
format_bounding_region(kv_pair.value.bounding_regions),
)
)
for page in result.pages:
print("----Analyzing document from page #{}----".format(page.page_number))
print(
"Page has width: {} and height: {}, measured with unit: {}".format(
page.width, page.height, page.unit
)
)
for line_idx, line in enumerate(page.lines):
print(
"...Line # {} has text content '{}' within bounding box '{}'".format(
line_idx,
line.content,
format_polygon(line.polygon),
)
)
for word in page.words:
print(
"...Word '{}' has a confidence of {}".format(
word.content, word.confidence
)
)
for selection_mark in page.selection_marks:
print(
"...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
format_polygon(selection_mark.polygon),
selection_mark.confidence,
)
)
for table_idx, table in enumerate(result.tables):
print(
"Table # {} has {} rows and {} columns".format(
table_idx, table.row_count, table.column_count
)
)
for region in table.bounding_regions:
print(
"Table # {} location on page: {} is {}".format(
table_idx,
region.page_number,
format_polygon(region.polygon),
)
)
for cell in table.cells:
print(
"...Cell[{}][{}] has content '{}'".format(
cell.row_index,
cell.column_index,
cell.content,
)
)
for region in cell.bounding_regions:
print(
"...content on page {} is within bounding box '{}'\n".format(
region.page_number,
format_polygon(region.polygon),
)
)
print("----------------------------------------")
if __name__ == "__main__":
analyze_general_documents()
error:
Traceback (most recent call last):
File "c:\Users\SESA70\Desktop\azure\text_extract.py", line 116, in <module>
analyze_general_documents()
File "c:\Users\SESA737860\Desktop\azure\text_extract.py", line 24, in analyze_general_documents
poller = document_analysis_client.begin_analyze_document_from_url(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\ai\formrecognizer\_document_analysis_client.py", line 198, in begin_analyze_document_from_url
return _client_op_path.begin_analyze_document( # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\ai\formrecognizer\_generated\v2023_07_31\operations\_document_models_operations.py", line 518, in begin_analyze_document
raw_result = self._analyze_document_initial( # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\SESA70\AppData\Local\Programs\Python\Python311\Lib\site-packages\azure\ai\formrecognizer\_generated\v2023_07_31\operations\_document_models_operations.py", line 443, in _analyze_document_initial
raise HttpResponseError(response=response)
azure.core.exceptions.HttpResponseError: (AuthenticationTypeDisabled) Key based authentication is disabled for this resource.
Code: AuthenticationTypeDisabled
Message: Key based authentication is disabled for this resource.
I believe the code is perfectly fine, why is it I am getting this error is it because my organisation didnt provide rights to use this resource or am i missing anything.
I am new to azure please correct me where I'm going wrong, thankyou.
I tried to replicate the error you are getting. I copied your code and replaced the endpoint and credential key. I just ran it in the terminal it is working perfectly fine. You can check this!
From what I see, you do not have sufficient access or privilege to access that resource. That is the issue.