I wrote a program in the python programming language that converts a pdf file to a jpg file. I used the "eel" library in the frontend part. Everything is working in VS Code. I used pdfium library to convert PDF to jpg file. I compiled the application code using pyinstaller. There is a problem with the pdfium library before launch.
func.py: i
import pandas as pd
import requests
from urllib.parse import urlencode
from datetime import datetime
import pypdfium2 as pdfium
import win32com.client as win32
from pywintypes import com_error
import psutil
import time,os
import openpyxl as xl
def ExceltoImage(excelfile):
for proc in psutil.process_iter():
if proc.name() == "EXCEL.EXE":
proc.kill()
wbx = xl.load_workbook(excelfile)
print(wbx.sheetnames[0])
pdffile = os.path.join(os.getcwd(),'temp','template.pdf')
excel = win32.gencache.EnsureDispatch('Excel.Application')
wb = excel.Workbooks.Open(excelfile)
time.sleep(7)
# ws = wb.WorkSheets(1).Select()
ws = wb.Worksheets(wbx.sheetnames[0]).Select()
# Save
wb.ActiveSheet.ExportAsFixedFormat(0, pdffile)
time.sleep(7)
wb.Close()
excel.Quit()
time.sleep(2)
pdf = pdfium.PdfDocument(pdffile)
imgs = []
for i in range(len(pdf)):
img = os.path.join(os.getcwd(),'temp',f'template{i:03d}.jpg')
page = pdf[i]
image = page.render(scale=4).to_pil()
image.save(img)
imgs.append(img)
return imgs
def ExceltoImageHeader(pdffile):
pdf = pdfium.PdfDocument(pdffile)
imgs = []
for i in range(len(pdf)):
img = os.path.join(os.getcwd(),'temp',f'header{i:03d}.jpg')
page = pdf[i]
image = page.render(scale=4).to_pil()
image.save(img)
imgs.append(img)
return imgs
def getImgData(file):
excel_data = pd.read_excel(file,usecols="A,B,C,D")
data = pd.DataFrame(excel_data)
d = data.values.tolist()
newdata = []
for tr,x in enumerate(d):
if tr==0:
continue
if str(x[2])!='nan' and str(x[3])!='nan':
newdata.append(x)
return newdata
def getImgList(file):
excel_data = pd.read_excel(file,usecols="C")
data = pd.DataFrame(excel_data)
d = data.values.tolist()
newdata = []
for tr,x in enumerate(d):
if tr<7:
continue
if str(x[0])!='nan':
newdata.append(x)
print(type(newdata),newdata)
newdata = list(dict.fromkeys(newdata))
return newdata
def getyandex(linkimg):
name = os.path.basename(linkimg)
image_name = os.path.join(os.getcwd(),'temp',name)
print(linkimg)
headers = requests.utils.default_headers()
headers.update(
{
'User-Agent': 'My User Agent 1.0',
}
)
r = requests.get(linkimg, stream=True,headers=headers)
if r.status_code==200:
with open(image_name, 'wb') as f:
for chunk in r.iter_content():
f.write(chunk)
if(os.path.exists(image_name)):
return image_name
else:
return False
else:
return False
main.py:
from fpdf import FPDF
import os,json,datetime
from func import getImgData,getImgList,getyandex,ExceltoImage,ExceltoImageHeader
class FPDF(FPDF):
def header(self):
f = open(os.path.join(os.getcwd(),'template','setting.json'),encoding="UTF-8")
setting = json.load(f)
# Устанавливаем лого
self.image('template/logo.jpg', 8, 6, 25)
self.add_font('sysfont', '', r"c:\WINDOWS\Fonts\timesi.ttf", uni=True)
self.set_font('sysfont', '', 8)
# Добавляем адрес
self.cell(150)
self.cell(0, 4, setting['company'], ln=1)
self.cell(150)
self.cell(0, 4, setting['inn'], ln=1)
self.cell(150)
self.cell(0, 4, setting['address'], ln=1)
self.cell(150)
self.cell(0, 4, setting['tel'], ln=1)
self.cell(150)
self.cell(0, 4, setting['email'], ln=1)
# Разрыв линии
self.ln(3)
def footer(self):
self.set_y(-10)
self.set_font('Arial', 'I', 8)
# Добавляем номер страницы
page = str(self.page_no()) + '/{nb}'
self.cell(0, 10, page, 0, 0, 'C')
def create_pdf(smetfile,imgsfile):
f = open(os.path.join(os.getcwd(),'template','setting.json'),encoding="UTF-8")
setting = json.load(f)
pdf = FPDF()
# Создаем особое значение {nb}
pdf.alias_nb_pages()
# part 1
pdf.add_page()
pdf.add_font('Times', '', r"c:\WINDOWS\Fonts\timesbd.ttf", uni=True)
pdf.set_font('Times', '', 14)
pdf.cell(0, 5, txt=setting['header_1'], ln=1,align='C')
pdf.cell(0, 5, txt=setting['header_2'], ln=1,align='C')
pdf.cell(0, 3, txt="", ln=1,align='C')
pdf.cell(0, 5, txt=setting['header_3'], ln=1,align='C')
pdf.cell(0, 5, txt="", ln=1,align='C')
listimg = getImgList(smetfile)
dataimg = getImgData(imgsfile)
if len(listimg)>0 and len(dataimg)>0:
for indx,ls in enumerate(listimg):
if indx>1 and indx%2==0:
pdf.add_page()
getindex = False
for index,img in enumerate(dataimg):
if img[0]==ls[0]:
getindex = index
if getindex==False:
print("Not Found")
continue
else:
name = dataimg[getindex][0].strip()
typeimg = dataimg[getindex][1].strip()
imglink = dataimg[getindex][2].strip()
videolink = dataimg[getindex][3].strip()
if typeimg=='Не отображаем' or imglink=='Без изображения':
continue
pdf.cell(0, 10, txt=name, ln=1,align='C')
pdf.cell(30)
print(imglink)
imglink = 'https://prokarniz.ru/wp-content/uploads/2017/09/elektricheskiy-karniz-novokitay-2-380x260.jpg'
img = getyandex(imglink)
if img==False:
print("Not Found 404")
continue
if os.path.exists(img):
pdf.image(img,w=130)
else:
continue
if videolink!='Без видео':
pdf.cell(0, 10, txt=videolink, ln=1,align='C',link=videolink)
pdf.cell(10,ln=1)
# part 2
# pdf.set_margins(0,0,0)
pdf.add_page("L")
# pathxlsx = os.path.join(os.getcwd(),'template','template.xlsx')
imgs = ExceltoImage(smetfile)
# pdf.cell(20)
if len(imgs)>0:
for imgxx in imgs:
pdf.image(imgxx,10,30,w=280,h=180)
# part 3
pdf.add_page('P')
pdf.add_font('Times', '', r"c:\WINDOWS\Fonts\timesbd.ttf", uni=True)
pdf.set_font('Times', '', 14)
pathxlsxheader = os.path.join(os.getcwd(),'template','footer.pdf')
imgs = ExceltoImageHeader(pathxlsxheader)
# pdf.cell(20)
if len(imgs)>0:
for imgxx in imgs:
pdf.image(imgxx,0,30,w=200,h=260)
now = datetime.datetime.now()
string = now.strftime('%Y%m%d%H%M%S')
pathtopdf = os.path.join(os.getcwd(),'output',f'result_{string}.pdf')
pdf.output(pathtopdf)
# if __name__ == '__main__':
# create_pdf('output/header_footer.pdf')
app.py
import main
import eel,os
import tkinter
import tkinter.filedialog as filedialog
if __name__ == '__main__':
# pathsmet = ''
# pathimages = ''
@eel.expose
def run(pathsmet,pathimages):
print(pathsmet,pathimages)
if os.path.exists(pathsmet) and os.path.exists(pathimages):
pdfpathname = main.create_pdf(pathsmet,pathimages)
return pdfpathname
else:
return "NO"
@eel.expose
def selectFolder(type):
print("Here")
root = tkinter.Tk()
root.attributes("-topmost", True)
root.withdraw()
filetypes = (
('Excel', '*.xlsx'),
('All files', '*.*')
)
directory_path = filedialog.askopenfilename(
title='Excel file',
initialdir='/',
filetypes=filetypes)
if type=='smet':
pathsmet=directory_path
print(pathsmet)
return str(pathsmet)
if type=='imgs':
pathimages=directory_path
print(pathimages)
return str(pathimages)
chrome = os.path.join(os.getcwd(),'template','chrome-win','chrome.exe')
front = os.path.join(os.getcwd(),'template','front')
eel.init(front)
eel.browsers.set_path("chrome", chrome)
eel.start('index.html', mode="chrome", size=(760, 760))
py -m eel app.py web
Traceback (most recent call last): File "app.py", line 1, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "main.py", line 3, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "func.py", line 5, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_init.py", line 4, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_library_scope.py", line 6, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2\raw.py", line 5, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_raw__init.py", line 5, in File "PyInstaller\loader\pyimode2_importers.py", line 419, in exec_module File "pypdfium2_raw\bindings.py", line 53, in File "pypdfium2_raw\bindings.py", line 44, in _register_library File "pypdfium2_raw\bindings.py", line 37, in _find_library ImportError: Could not find library 'pdfium' (dirs=['.'], search_sys=False) [8092] Failed to execute script 'app' due to unhandled exception!
Everything is working in VS Code. When I flash the EXE with pyinstallaer, I get an error saying that the "pdfium" library was not found.
The pypdfium2 modules still look for some files present in site-packages even after the solution is packaged to an EXE. And these files ('pdfium.dll' and two different 'version.json') are nowhere to be found.
My workaround was to simply add these files to the correct path using pyinstaller's spec file (or using the command line option --add-data)
So, for that, you'll first need to have pypdfium2 installed in your machine (preferably on a virtual environment) and have your spec file with the following tuples included in datas:
a = Analysis(
['your_project.py'],
pathex=[],
binaries=[],
datas=[
('venv\\Lib\\site-packages\\pypdfium2_raw\\pdfium.dll', 'pypdfium2_raw'),
('venv\\Lib\\site-packages\\pypdfium2_raw\\version.json', 'pypdfium2_raw'),
('venv\\Lib\\site-packages\\pypdfium2\\version.json', 'pypdfium2')
],
...
)
Alternatively, you could pass those files as multiple --add-data with the pyinstaller command
pyinstaller --add-data "venv\Lib\site-packages\pypdfium2_raw\pdfium.dll;pypdfium2_raw" --add-data "venv\Lib\site-packages\pypdfium2_raw\version.json;pypdfium2_raw" --add-data "venv\Lib\site-packages\pypdfium2\version.json;pypdfium2" your_project.py
Here is the reference for the spec file: https://pyinstaller.org/en/v4.0/spec-files.html
PS.: The above paths are for a virtual environment called venv and a windows OS, but basically you'll just need to go where the site-pakages are located :)