pythonpdfbulkstamp

Python - adding a script to select all pdf files


I am trying to bulk stamp a number of pdf files, I found something on github that does something very similar but you have to name each file within the script to match with the actual pdf file for it to work.

https://github.com/iprapas/pythonpdf

def stamp_pdf(input_path, stamp_path, output_path, add_frame=False):
    output = PdfFileWriter()
    create_pdf_stamp(stamp_path, add_frame=add_frame)
    pdf_in = PdfFileReader(open(input_path, 'rb'))
    pdf_stamp = PdfFileReader(open(stamp_path, 'rb'))
    stamp = pdf_stamp.getPage(0)

    for i in xrange(pdf_in.getNumPages()):
        page = pdf_in.getPage(i)
        page.mergePage(stamp)
        output.addPage(page)

    with open(output_path, 'wb') as f:
        output.write(f)

def main():
    stamp_pdf('../input/input1.pdf', '../temp/tmp_stamp.pdf', '../output/stamped1.pdf')
    stamp_pdf('../input/input1.pdf', '../temp/tmp_stamp.pdf', '../output/stamped1_with_frame.pdf', add_frame=True)
    stamp_pdf('../input/input2.pdf', '../temp/tmp_stamp.pdf', '../output/stamped2.pdf')
    stamp_pdf('../input/input2.pdf', '../temp/tmp_stamp.pdf', '../output/stamped2_with_frame.pdf', add_frame=True)

if __name__ == "__main__":

main()

I'm sure there's a way to replace the individual file link so that it points directly to the directory and keeps the file name with it as well. Any pointers to get me started would be much appreciated as I have been trying out all sorts of codes without much luck.


Solution

  • Easily access and manage paths and filenames with pathlib


    Example:

    from pathlib import Path
    
    p = Path.cwd()
    print(p)
    >>> WindowsPath('E:/PythonProjects/DataCamp')
    
    pdf_files = list(p.glob('*.pdf'))
    print(pdf_files)
    >>> [WindowsPath('E:/PythonProjects/DataCamp/aapl.pdf')]
    
    pdf_name = pdf_files[0].name
    print(pdf_name)
    >>> 'aapl.pdf'
    

    Output files:

    out_dir = p / 'output'
    print(out_dir)
    >>> WindowsPath('E:/PythonProjects/DataCamp/output')
    
    out_pdf = out_dir / f'stamped_{pdf_name}'
    print(out_pdf)
    >>> WindowsPath('E:/PythonProjects/DataCamp/output/stamped_aapl.pdf')
    

    pythonpdf library might not work with pathlib objects:

    print(type(stamp_path))
    >>> pathlib.WindowsPath
    
    print(type(str(stamp_path))
    >>> str
    
    create_pdf_stamp(str(stamp_path), add_frame=add_frame)
    

    iterating through .glob:

    p = Path('e:/PythonProjects')
    files = p.glob('**/*.pdf')
    
    for file in files:
        print(file)
        ...
        # do other stuff