pythonzippython-zipfile

renaming the extracted file from zipfile


I have lots of zipped files on a Linux server and each file includes multiple text files.

what I want is to extract some of those text files, which have the same name across zipped files and save it a folder; I am creating one folder for each zipped file and extract the text file to it. I need to add the parent zipped folder name to the end of file names and save all text files in one directory. For example, if the zipped folder was March132017.zip and I extracted holding.txt, my filename would be holding_march13207.txt.

My problem is that I am not able to change the extracted file's name. I would appreciate if you could advise.

import os 
import sys 
import zipfile
os.chdir("/feeds/lipper/emaxx") 

pwkwd = "/feeds/lipper/emaxx" 

for item in os.listdir(pwkwd): # loop through items in dir
    if item.endswith(".zip"): # check for ".zip" extension
        file_name = os.path.abspath(item) # get full path of files
        fh = open(file_name, "rb")
        zip_ref = zipfile.ZipFile(fh)

        filelist = 'ISSUERS.TXT' , 'SECMAST.TXT' , 'FUND.TXT' , 'HOLDING.TXT'
        for name in filelist :
            try:
                outpath = "/SCRATCH/emaxx" + "/" + os.path.splitext(item)[0]
                zip_ref.extract(name, outpath)

            except KeyError:
                {}

        fh.close()

Solution

  • Why not just read the file in question and save it yourself instead of extracting? Something like:

    import os
    import zipfile
    
    source_dir = "/feeds/lipper/emaxx"  # folder with zip files
    target_dir = "/SCRATCH/emaxx"  # folder to save the extracted files
    
    # Are you sure your files names are capitalized in your zip files?
    filelist = ['ISSUERS.TXT', 'SECMAST.TXT', 'FUND.TXT', 'HOLDING.TXT']
    
    for item in os.listdir(source_dir):  # loop through items in dir
        if item.endswith(".zip"):  # check for ".zip" extension
            file_path = os.path.join(source_dir, item)  # get zip file path
            with zipfile.ZipFile(file_path) as zf:  # open the zip file
                for target_file in filelist:  # loop through the list of files to extract
                    if target_file in zf.namelist():  # check if the file exists in the archive
                        # generate the desired output name:
                        target_name = os.path.splitext(target_file)[0] + "_" + os.path.splitext(file_path)[0] + ".txt"
                        target_path = os.path.join(target_dir, target_name)  # output path
                        with open(target_path, "w") as f:  # open the output path for writing
                            f.write(zf.read(target_file))  # save the contents of the file in it
                    # next file from the list...
        # next zip file...