I am trying to bulk stamp a number of pdf files, I found something on github that does something very similar but you have to name each file within the script to match with the actual pdf file for it to work.
https://github.com/iprapas/pythonpdf
def stamp_pdf(input_path, stamp_path, output_path, add_frame=False):
output = PdfFileWriter()
create_pdf_stamp(stamp_path, add_frame=add_frame)
pdf_in = PdfFileReader(open(input_path, 'rb'))
pdf_stamp = PdfFileReader(open(stamp_path, 'rb'))
stamp = pdf_stamp.getPage(0)
for i in xrange(pdf_in.getNumPages()):
page = pdf_in.getPage(i)
page.mergePage(stamp)
output.addPage(page)
with open(output_path, 'wb') as f:
output.write(f)
def main():
stamp_pdf('../input/input1.pdf', '../temp/tmp_stamp.pdf', '../output/stamped1.pdf')
stamp_pdf('../input/input1.pdf', '../temp/tmp_stamp.pdf', '../output/stamped1_with_frame.pdf', add_frame=True)
stamp_pdf('../input/input2.pdf', '../temp/tmp_stamp.pdf', '../output/stamped2.pdf')
stamp_pdf('../input/input2.pdf', '../temp/tmp_stamp.pdf', '../output/stamped2_with_frame.pdf', add_frame=True)
if __name__ == "__main__":
main()
I'm sure there's a way to replace the individual file link so that it points directly to the directory and keeps the file name with it as well. Any pointers to get me started would be much appreciated as I have been trying out all sorts of codes without much luck.
pathlib treats paths as objects instead of strings
pathlib objects have methods that work with them (e.g. open, glob, name, ect.)from pathlib import Path
p = Path.cwd()
print(p)
>>> WindowsPath('E:/PythonProjects/DataCamp')
pdf_files = list(p.glob('*.pdf'))
print(pdf_files)
>>> [WindowsPath('E:/PythonProjects/DataCamp/aapl.pdf')]
pdf_name = pdf_files[0].name
print(pdf_name)
>>> 'aapl.pdf'
glob method to find all the pdf files, including subdirectories, with ** wildcards
p.glob('**/*.pdf')name to get and easily track the filenameout_dir = p / 'output'
print(out_dir)
>>> WindowsPath('E:/PythonProjects/DataCamp/output')
out_pdf = out_dir / f'stamped_{pdf_name}'
print(out_pdf)
>>> WindowsPath('E:/PythonProjects/DataCamp/output/stamped_aapl.pdf')
pythonpdf library might not work with pathlib objects:pathlib objects back to strprint(type(stamp_path))
>>> pathlib.WindowsPath
print(type(str(stamp_path))
>>> str
create_pdf_stamp(str(stamp_path), add_frame=add_frame)
.glob:.glob object is a generator functionp = Path('e:/PythonProjects')
files = p.glob('**/*.pdf')
for file in files:
print(file)
...
# do other stuff