pythonpyinstallerrdkit

Pyinstaller´s resulting executable goes blank and crashes


i´m very new to all of this but i have a script and to make things more simple i wanted to make it an executable so i installed pyintaller and run it with the "--onefile" comand... when i got the executable it only opens a blank console window and before i can even read what appears on it, it closes

i will leave the whole script since is not that big, it has a lot of explanations since the primary goal is to give it to others so they can use it without much knowledge, one of the reasons why making it an executable is so important:

from rdkit import Chem
from mordred import Calculator, descriptors
import pandas as pd
import csv
import datetime
import time
import os


# start date
P = datetime.datetime.now()

# flair
print("----------------------------------------------------------------")
print("                                                                ")
print("                             START                              ")
print("                                                                ")
print("----------------------------------------------------------------")

# directions
print("                                     WARNING!!!                                   ")
print(" ")
print("- ONLY PUT THE NAMES OF THE FILES, EXTENSIONS ARE NOT NESESARY.")
print(" ")
print("- MAKE SURE THAT THE FILE NEEDED IS IN THE SAME DIRECTORY AS THE SCRIPT.")
print(" ")
print("- THE PROGRAM DIFERENTIATES BETWEEN .CSV AND .XLSX FILES, THE INPUT IS IN .XLSX AND THE")
print("  OUTPUT IS IN .CSV")
print("  YOU MUST BE CAREFULL SINCE IT IS POSIBLE TO END WITH TWO FILES WITH THE SAME NAME")
print("  BUT DIFERENT EXTENSIONS (YOUR_FILE.XLXS AND YOUR_FILE.CSV)")
print(" ")
# imputs and otputs

print(" ")
inputfile = input("Enter filename (can only be an excel file): ")
column = input("Name of column where aminoacids sequences are: ")
outputfile = input("Name of resulting .csv file: ")
print(" ")

# verification

test = f"{outputfile}.csv"

def file_exists(test):
    return os.path.isfile(test)

# Check if the file already exists in the directory
if file_exists(test):
    print("Warning: File '{}' already exists in the directory.".format(outputfile))
    while True:
        choice = input("Do you want to rename (1), stop the program (2), or continue with the same name (3)? (1/2/3): ").lower()
        if choice == '1':
            new_filename = input("Enter the new filename: ")
            test2 = f"{new_filename}.csv"
            if file_exists(test2):
                print("Error: File '{}' already exists in the directory.".format(new_filename))
            else:
                os.rename(test, test2)
                print("File '{}' renamed to '{}'.".format(outputfile, new_filename))
                outputfile = new_filename
                break
        elif choice == '2':
            print("Program stopped.")
            quit()
        elif choice == '3':
            print("Continuing with the same filename '{}'...".format(outputfile))
            break
        else:
            print("Invalid choice. Please enter rename, stop or continue; '1', '2', or '3'.")

# Define a mapping between amino acids and their SMILES representations
amino_acid_smiles = {
    'A': 'CC(C)(C)C(=O)N',
    'R': 'N[C@@H](CCCNC(N)=N)C(=O)N[C@@H](CCCNC(N)=N)C(=O)N[C@@H](CCCNC(N)=N)C(=O)NCC',
    'N': 'NC(CO)C(=O)O',
    'D': 'NC(CC(=O)O)C(=O)O',
    'C': 'C(C(=O)O)N',
    'Q': 'NC(CCC(N)=O)C(=O)O',
    'E': 'NC(CC(=O)O)C(=O)O',
    'G': 'NCC(=O)O',
    'H': 'NC1=NC=CC=C1CCC(=O)N',
    'I': 'CC(C)CC(C(=O)O)N',
    'L': 'CC(C)CC(C(=O)O)N',
    'K': 'NCCCC[C@H](N)C(=O)O',
    'M': 'CC(C)C[C@H](N)C(=O)O',
    'F': 'c1ccc(cc1)C[C@@H](C(=O)O)N',
    'P': 'C1CC(NC1)C(=O)O',
    'S': 'C(CO)N',
    'T': 'CC(C(O)C)N',
    'W': 'C1=C(NC=N1)C[C@@H](C(=O)O)N',
    'Y': 'c1ccc(cc1)C[C@H](C(=O)O)N',
    'V': 'CC(C)C(C(=O)O)N'
}

# Function to convert an amino acid sequence to a SMILES sequence
def sequence_to_smiles(sequence):
    smiles_sequence = ''
    for aa in sequence:
        if aa in amino_acid_smiles:
            smiles_sequence += amino_acid_smiles[aa]
    return smiles_sequence

# INICIATE CALCULATOR
calc = Calculator(descriptors, ignore_3D=True)

def calculate_descriptors(smiles_list):
    results = []
    for smiles in smiles_list:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            # Calculate descriptors
            result = calc(mol)
            results.append(result)
        else:
            results.append(None)

    return results

if __name__ == "__main__":
    # READ SEQUENCES FROM EXCEL FILE
    file_path = f'{inputfile}.xlsx'
    column_name = f'{column}'  # Specify the column name where the sequences are located
    df = pd.read_excel(file_path)
    sequence_list = df[column_name].tolist()

    # CONVERT AA TO SMILES
    smiles_list = [sequence_to_smiles(sequence) for sequence in sequence_list]

    # CalCULATE DESCRIPTORS
    descriptor_results = calculate_descriptors(smiles_list)

    # save to csv
    output_file = f'{outputfile}.csv'
    with open(output_file, 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)

        # NAMING THE COLUMNS
        header = ['Sequence', 'SMILES'] + list(calc.descriptors)  # Convert tuple to list
        csvwriter.writerow(header)

        # PUTING THE DATA
        for sequence, smiles, result in zip(sequence_list, smiles_list, descriptor_results):
            if result is not None:
                # CONVERT TO LIST
                result_list = list(result)
                csvwriter.writerow([sequence, smiles] + result_list)
            else:
                csvwriter.writerow([sequence, smiles] + ['Error: Invalid SMILES'])

    # record finishing time
    Q = datetime.datetime.now()

    # finishing statements
    print("----------------------------------------------------------------")
    print(" ")
    print("                             FINISH                             ")
    print(" ")
    print("started at", P, "finished at", Q)
    print(f"Descriptor results saved to {output_file}")
    print(" ")
    print("----------------------------------------------------------------")

Solution

  • Does this solve your query?

    If that doesn't solve it, I think the issue lies with the if __name__ == "__main__": The check exists to see if the file is being run as a script directly or a module. But since we're executing a binary it could be leading to the errors.

    Another way of checking for potential error statements could be by adding a time.sleep(10) line just so the console remains open for you to see what the error is (don't forget to import time)