pythonexcelpandasopenpyxl

how to read excel merged cell properties value using python?


I need to read data from an Excel file. The first cell contains the property name, and the second cell contains the property value. However, some of the property names in the first column are merged across two or more columns, and the corresponding values are in the next cell. For example, the property name "Ref" is in columns A and B, and its value is in column C.

I want to retrieve the value of the "Ref" property from column C in my Excel file.

Here is my excel image:

enter image description here

I am using python. Here is the output:

Approval Memo of : SHILPI AKTER
Name of the Applicant : SHILPI AKTER
Name of Territory : Comilla
Total Family Expenses : 30000
Ref : N/A
Amount : N/A
Total Amount : 3000

Ref and Amount Properties value not found. Here is my code:

     import os
    import openpyxl
    from openpyxl.utils import column_index_from_string

    file_path = r"D:\file\input\example.xlsx"  
    if os.path.exists(file_path):
        print("File exists!")
    else:
        print("File not found! Check the path.")
        exit()

    target_sheet = "Output Approval Templete"

    # Define the properties to extract
    properties = [ 
        "Approval Memo of",
        "Name of the Applicant",
        "Name of Territory",
        "Total Family Expenses",
        "Ref",
        "Amount",
        "Total Amount"
    ]

    # Function to get the actual value from a merged cell
    def get_merged_cell_value(sheet, row, col):
        for merged_range in sheet.merged_cells.ranges:
            min_row, min_col, max_row, max_col = merged_range.bounds  # Extract merged cell bounds
            if min_row <= row <= max_row and min_col <= col <= max_col:
                return sheet.cell(min_row, min_col).value  # Return the first cell's value of the merged range
        return sheet.cell(row, col).value  

    # Function to format numeric values properly
    def format_value(value):
        if isinstance(value, float) and value > 1e10:  # Large numbers like NID
            return str(int(value))  # Convert to integer and string to avoid scientific notation
        elif isinstance(value, (int, float)):  # General number formatting
            return str(value)
        elif value is None:
            return "N/A"  # Handle missing values
        return str(value).strip()

    try:
        # Load the workbook
        wb = openpyxl.load_workbook(file_path, data_only=True)

        if target_sheet not in wb.sheetnames:
            print(f"Sheet '{target_sheet}' not found in the file.")
        else:
            ws = wb[target_sheet]
            extracted_data = {}

            # Iterate over rows to extract data
            for row in ws.iter_rows():
                for cell in row:
                    # Check if the cell value is a property we are looking for
                    if cell.value and isinstance(cell.value, str) and cell.value.strip() in properties:
                        prop_name = cell.value.strip()
                        col_idx = cell.column  # Get column index (1-based)
                        next_col_idx = col_idx + 1  # Next column index

                        # Ensure next column exists within sheet bounds
                        if next_col_idx <= ws.max_column:
                            # Check if the cell is merged, and get its value
                            next_value = get_merged_cell_value(ws, cell.row, next_col_idx)
                            
                            # Store the formatted value for the property
                            extracted_data[prop_name] = format_value(next_value)  # Store extracted value

            # Print extracted values
            for key, value in extracted_data.items():
                print(f"{key} : {value}")

    except Exception as e:
        print(f"Error loading workbook: {e}")

Please help me to find out merge cell properties value.


Solution

  • Just get the last cell in the range column number and add 1 as you have with the other fields.

    This code assumes the merge cells are row only.
    Also assumes the key name is cells is exactly the same as the name in the properties List

    import os
    import openpyxl
    
    
    def get_next_col(lc):   # lc = Left cell in the merge range
        for merge in ws.merged_cells:
            if lc in merge.coord:
                print(f"Merge Range: {merge.coord}")
                return merge.top[-1][1]+1  # Return 2nd value of last tuple incremented by 1
    
    
    def format_value(value):
        if isinstance(value, float) and value > 1e10:  # Large numbers like NID
            return str(int(value))  # Convert to integer and string to avoid scientific notation
        elif isinstance(value, (int, float)):  # General number formatting
            return str(value)
        elif value is None:
            return "N/A"  # Handle missing values
        return str(value).strip()
    
    
    # Define the properties to extract
    properties = [
        "Approval Memo of",
        "Name of the Applicant",
        "Name of Territory",
        "Total Family Expenses",
        "Ref",
        "Amount",
        "Total Amount"
    ]
    
    # Init Dictionary 
    extracted_data = {}  
    
    # Set working sheet name
    target_sheet = "Output Approval Templete"
    
    
    # Load the workbook
    file_path = r"D:\file\input\example.xlsx"
    
    if os.path.exists(file_path):
        print("File exists!\n")
    else:
        print("File not found! Check the path.")
        exit()
    
    wb = openpyxl.load_workbook(file_path, data_only=True)
    ws = wb.active
    
    # Check working sheet exists
    if target_sheet not in wb.sheetnames:
        print(f"Sheet '{target_sheet}' not found in the file.")
    else:
        ws = wb[target_sheet]
    
    # Process rows 
    for row in ws.iter_rows():
        for cell in row:
            cv = cell.value
            if isinstance(cv, str):   # Strip if the cell value is a string
                cv = cv.strip()
            if cv in properties:  # Process only cells with value in the 'properties' List
                co = cell.coordinate
                print(f"Processing '{cv}' in 'Properties' List at cell {co}")
    
                if co in ws.merged_cells:  # Check if the current cell is in a merge
                    print('This is also a merged cell:')
                    col = get_next_col(co)   # If merged get the next col number after the merge range
                else:
                    col = cell.col_idx + 1   # If not merged get the next col number after the cell
    
                next_value = ws.cell(cell.row, col).value  # Get next cell value as determined by value of 'col'
                print(f"Inserting Key: '{cv}' with Value: {next_value}")
                extracted_data[cv] = format_value(next_value)  # Add key and value to the dictionary
                print("-----------\n")
    
    for key, val in extracted_data.items():
        print(f"{key} : {val}")
    

    Output
    Extracted data from example Sheet.

    Approval Memo of : SHILPI AKTER
    Name of the Applicant : SHILPI AKTER
    Name of Territory : Comilla
    Total Family Expenses : 30000
    Ref : 22000
    Amount : 5000
    Total Amount : 3000