SQL DEV version Oracle 22.2.1.234
Summary Given that I have a table (simplified for question purpose) define as :
Doc ID (PK) | Doc text (XML/text/whatever) |
---|---|
DOC 1 | Doc-1-text |
Doc 2 | Doc-2-text |
Doc 3 | Doc-3-text |
Doc ... | Doc-...-text |
Doc 100 | Doc-100-text |
I use the run command to view the result of a SELECT on it
What I want to do
Export the result grid to excel so I can manipulate data externaly
Issue
When the doc text column content is less than say 40kb, it gets exported just fine
When the doc text column content is over than 40kb, it gets exported as blank (with a doc ID but blank text)
When I copy the problematic data manually, row by row from the result grid, I see no issue
Questions
Is there an export size limitation I would not know ?
Is there a way not to have to copy all the records one by one by hand ?
Is there an export configuration that would allow me to export results as file DOC ID containing the corresponding text ?
Tried to use SPOOL instruction: Not working
Tried to increase page/buffer size: Not working
I decided to play around with python to meet my needs on second thought... pure PL/SQL was not an option.
I thought I'd share my solution with you in case it could help :)
Notes:
Used pip to install pyzipper to enable zip file protection
Wanted to have some fun and managed to play sound on script start/end/error
Script input file format is in SQL Dev result grid export XML format
"""
CHANGELOG:
- Initial version: Extracts and saves XML elements from an input file.
- Added support for creating a ZIP archive.
- Implemented password protection for ZIP files.
- Fixed ZIP password issue (ensured encryption using pyzipper).
- Added an option to specify which XML tag should be used as the file name (default: "id").
- Display an error log in Notepad if errors occur; delete it if no errors.
- Added a sound effect at the beginning and end of execution using winsound (Windows only).
- Implemented a proper command-line argument validation mechanism.
- Display progress percentage after each file is processed, inside the ID processing message.
- Optimized file handling and logging efficiency.
"""
import os
import sys
import time
import pyzipper
import subprocess
import winsound
from datetime import datetime
from xml.etree import ElementTree as ET
log_file = "log.txt"
error_log_file = "error_log.txt"
log_entries = []
error_entries = []
def play_sound(s):
"""Plays a sound file if it exists (Windows only)."""
if os.path.exists(s):
winsound.PlaySound(s, winsound.SND_FILENAME)
def extract_and_save_foo_elements(input_file, column_name="RICH_TEXT_NCLOB", create_zip=False, zip_password=None, file_id_tag="id"):
# Create output directory if it does not exist
output_dir = "xmls"
os.makedirs(output_dir, exist_ok=True)
start_time = time.time()
start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
file_count = 0
log_entries.append(f"Process started at: {start_timestamp}\n")
print(log_entries[-1])
try:
# Parse the XML file
tree = ET.parse(input_file)
root = tree.getroot()
if root.tag != "RESULTS":
raise ValueError("Root element is not <RESULTS>")
rows = root.findall("ROW")
total_rows = len(rows)
processed_rows = 0
for row in rows:
rich_text = row.find(f"COLUMN[@NAME='{column_name}']")
if rich_text is not None and rich_text.text:
foo_content = rich_text.text.strip()
try:
foo_element = ET.fromstring(foo_content) # Parse the foo element
id_element = foo_element.find(file_id_tag)
if id_element is not None and id_element.text:
id_text = id_element.text.strip()
file_name = f"{id_text}.xml"
file_path = os.path.join(output_dir, file_name)
# Update progress percentage
processed_rows += 1
progress_percentage = (processed_rows / total_rows) * 100
log_entries.append(f"Processing {file_id_tag}: {id_text} ({progress_percentage:.2f}%)\n")
print(log_entries[-1])
# Save into an XML file
with open(file_path, "w", encoding="utf-8") as output_file:
output_file.write(f"<?xml version='1.0' encoding='utf-8'?>\n")
output_file.write(foo_content)
file_count += 1
log_entries.append(f"File created: {file_path}\n")
print(log_entries[-1])
else:
log_entries.append(f"No {file_id_tag} found, element ignored.\n")
print(log_entries[-1])
except ET.ParseError as e:
error_entries.append(f"XML parsing error for {file_id_tag}: {id_text if 'id_text' in locals() else 'Unknown'} - {e}\n")
print(error_entries[-1])
except Exception as e:
error_entries.append(f"Error processing file: {e}\n")
print(error_entries[-1])
if create_zip:
# Compress folder with password protection using pyzipper
zip_filename = "xmls.zip" if not zip_password else "xmls_protected.zip"
log_entries.append(f"Creating zip archive: {zip_filename}\n")
print(log_entries[-1])
try:
with pyzipper.AESZipFile(zip_filename, 'w', encryption=pyzipper.WZ_AES) as zipf:
if zip_password:
zipf.setpassword(zip_password.encode()) # Set password for the archive
for root_dir, _, files in os.walk(output_dir):
for file in files:
file_path = os.path.join(root_dir, file)
arcname = os.path.relpath(file_path, output_dir)
zipf.write(file_path, arcname)
log_entries.append("Compression completed.\n")
print(log_entries[-1])
except Exception as e:
print(f"Error compressing files: {e}")
error_entries.append(f"Error compressing files: {e}\n")
print(error_entries[-1])
end_time = time.time()
end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
duration = end_time - start_time
log_entries.append(f"Process completed at: {end_timestamp}\n")
print(log_entries[-1])
log_entries.append(f"Total execution time: {duration:.2f} seconds\n")
print(log_entries[-1])
log_entries.append(f"Total files created: {file_count}\n")
print(log_entries[-1])
# Pause and wait for the user to press Enter
input("Press Enter to exit...")
# Write logs to file
with open("log.txt", "w", encoding="utf-8") as log_file:
log_file.write("\n".join(log_entries) + "\n")
if error_entries:
with open("error_log.txt", "w", encoding="utf-8") as error_file:
error_file.write("\n".join(error_entries) + "\n")
print("Errors occurred during the process. Opening error log...")
subprocess.run(["notepad", "error_log.txt"])
else:
print("Process completed successfully. No errors occurred.")
if os.path.exists("error_log.txt"):
os.remove("error_log.txt")
def validate_arguments():
"""Validates command-line arguments and identifies invalid options."""
valid_args = ["-h", "--column", "-z", "--id-tag"]
invalid_args = [arg for arg in sys.argv[1:] if arg.startswith("-") and arg not in valid_args]
# If "-h" is provided or there are invalid arguments, show usage info
if "-h" in sys.argv or invalid_args or len(sys.argv)==1:
print(f"Invalid options supplied! {', '.join(invalid_args) if invalid_args else 'None'}")
play_sound("homer_error.wav") # Play error sound
print(usage_table)
sys.exit(0)
input_file = sys.argv[1]
if not os.path.exists(input_file):
print(f"Error: The input file {input_file} does not exist.")
play_sound("homer_error.wav")
sys.exit(1)
column_name = "RICH_TEXT_NCLOB"
create_zip = "-z" in sys.argv
zip_password = None
file_id_tag = "id" # Default to 'id'
# Check for --column argument
if "--column" in sys.argv:
column_index = sys.argv.index("--column")
if column_index + 1 < len(sys.argv):
column_name = sys.argv[column_index + 1]
else:
print("Error: --column option must be followed by a column name.")
play_sound("homer_error.wav")
sys.exit(1)
# Check for --file-id-tag argument
if "--file-id-tag" in sys.argv:
file_id_tag_index = sys.argv.index("--file-id-tag")
if file_id_tag_index + 1 < len(sys.argv):
file_id_tag = sys.argv[file_id_tag_index + 1]
else:
print("Error: --file-id-tag option must be followed by a tag name.")
play_sound("homer_error.wav")
sys.exit(1)
# Check for zip password
if create_zip:
zip_index = sys.argv.index("-z")
if zip_index + 1 < len(sys.argv):
zip_password = sys.argv[zip_index + 1]
if len(zip_password) < 5: # Password length changed to 5
print("Error: Password for ZIP archive must be at least 5 characters long.")
play_sound("homer_error.wav")
sys.exit(1)
return input_file, column_name, create_zip, zip_password, file_id_tag
usage_table = """
Usage: python script.py <input_file> [--column column_name] [--file-id-tag tag_name] [-z] [zip_password]
+------------------+----------------------------------------------------+
| Option/Arg | Description |
+------------------+----------------------------------------------------+
| input_file | SQLDeveloper export file. |
| --column | (Optional) Column name (default: RICH_TEXT_NCLOB). |
| --file-id-tag | (Optional) Tag name to be used for file name (default: 'id').|
| -z | (Optional) Create a zip archive. |
| zip_password | (Optional) Protect the zip with a password. |
+------------------+----------------------------------------------------+
"""
if __name__ == "__main__":
play_sound("homer_start.wav")
# Validate arguments
input_file, column_name, create_zip, zip_password, file_id_tag = validate_arguments()
# Execute the main function
extract_and_save_foo_elements(input_file, column_name, create_zip, zip_password, file_id_tag)
play_sound("homer_done.wav")