I have a django application and a upload method. And A textarea where the content of the file will be returned. So if there is a text file uploaded. Then in the textarea the content of the textfile will be visible.
I try it now also with a image in a pdf file. And I try it with a console app. And that works. Tha text in the pdf file will be extracted.
But I try it now with the general upload method. And then I get this errror:
TypeError at /
image must be a wand.image.Image instance, not <UploadFile: UploadFile object (45)>
Request Method: POST
Request URL: http://127.0.0.1:8000/
Django Version: 4.1.1
Exception Type: TypeError
Exception Value:
image must be a wand.image.Image instance, not <UploadFile: UploadFile object (45)>
Exception Location: C:\Python310\lib\site-packages\wand\image.py, line 9310, in __init__
Raised during: main.views.ReadingFile
Python Executable: C:\Python310\python.exe
Python Version: 3.10.6
So this is the complete method:
from django.shortcuts import render
from django.views import View
from django.http import HttpResponseRedirect
from .forms import ProfileForm
from .models import UploadFile
from .textFromImages import TextFromImage
from wand.image import Image as wi
from PIL import Image
import pytesseract
from django.conf import settings
import io
import os
class ReadingFile(View):
def get(self, request):
form = ProfileForm()
return render(request, "main/create_profile.html", {
"form": form
})
def post(self, request):
submitted_form = ProfileForm(request.POST, request.FILES)
content = ''
if submitted_form.is_valid():
uploadfile = UploadFile(image=request.FILES["upload_file"])
name_of_file = str(request.FILES['upload_file'])
uploadfile.save()
with open(os.path.join(settings.MEDIA_ROOT,
f"{uploadfile.image}"), 'r') as f:
print("Now its type is ", type(name_of_file))
# reading PDF file
if name_of_file.endswith('.pdf'):
pdfFile = wi(uploadfile, resolution=300)
text_factuur_verdi = []
image = pdfFile.convert('jpeg')
imageBlobs = []
for img in image.sequence:
imgPage = wi(image=img)
imageBlobs.append(imgPage.make_blob('jpeg'))
for imgBlob in imageBlobs:
image = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(image, lang='eng')
text_factuur_verdi.append(text)
return text_factuur_verdi
# ENDING Reading pdf file
else:
content = f.read()
print(content)
return render(request, "main/create_profile.html", {
'form': ProfileForm(),
"content": content
})
return render(request, "main/create_profile.html", {
"form": submitted_form,
})
My question: So what I have to change?
Thank you
This are the error lines:
C:\Users\engel\Documents\NVWA\software\blockchainfruit\main\views.py, line 39, in post
pdfFile = wi(uploadfile, resolution=300)
…
Local vars
C:\Python310\lib\site-packages\wand\image.py, line 9310, in __init__
raise TypeError('image must be a wand.image.Image '
even I I do a hardcoded path string:
pdfFile = wi("C:\\Users\\engel\\Documents\\NVWA\\software\\blockchainfruit\\uploads\\images\\fixedPDF.pdf", resolution=300)
I get the same error:
TypeError at /
image must be a wand.image.Image instance, not 'C:\\Users\\engel\\Documents\\NVWA\\software\\blockchainfruit\\uploads\\images\\fixedPDF.pdf'
Traceback (most recent call last):
File "C:\Python310\lib\site-packages\django\core\handlers\exception.py", line 55, in inner
response = get_response(request)
File "C:\Python310\lib\site-packages\django\core\handlers\base.py", line 197, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Python310\lib\site-packages\django\views\generic\base.py", line 103, in view
return self.dispatch(request, *args, **kwargs)
File "C:\Python310\lib\site-packages\django\views\generic\base.py", line 142, in dispatch
return handler(request, *args, **kwargs)
File "C:\Users\engel\Documents\NVWA\software\blockchainfruit\main\views.py", line 42, in post
pdfFile = wi(
File "C:\Python310\lib\site-packages\wand\image.py", line 9310, in __init__
raise TypeError('image must be a wand.image.Image '
Exception Type: TypeError at /
Exception Value: image must be a wand.image.Image instance, not 'C:\\Users\\engel\\Documents\\NVWA\\software\\blockchainfruit\\uploads\\images\\fixedPDF.pdf'
Somebody any idea how to tackle this?
I have it now like this:
# reading PDF file
if name_of_file.endswith('.pdf'):
pdfFile = wi(filename= uploadfile.image.path , resolution=300)
text_factuur_verdi = []
image = pdfFile.convert('jpeg')
imageBlobs = []
for img in image.sequence:
imgPage = wi(image=img)
imageBlobs.append(imgPage.make_blob('jpeg'))
for imgBlob in imageBlobs:
image = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(image, lang='eng')
text_factuur_verdi.append(text)
return text_factuur_verdi
# ENDING Reading pdf file
And so this is the html:
{% load static %}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Create a Profile</title>
<link rel="stylesheet" href="{% static "main/styles/styles.css" %}">
</head>
<body>
<form action="/" method="POST" enctype="multipart/form-data">
{% csrf_token %}
{{ form }}
<button type="submit">Upload!</button>
</form>
{% if content %}
<textarea name="" id="" cols="30" rows="10">{{content}}</textarea>
{% endif %}
</body>
</html>
This works:
content = text_factuur_verdi
print(text_factuur_verdi)