pandasdatetimedatetime-format

Getting Datetime format column to export into excel without H:M:S


I am comparing two excel sheets for employees. Its the same data, just one week apart. It then highlights the changes of pay columns, if any exist. The code functions but the excel sheet that it exports still isn't in a format that I want. The excel sheet provides all dates in a format of 2023-06-18 00:00:00.

What I want is to produce a date in the excel file of m-d-yyyy. I have tried to convert the format after converting the columns to datetime and I have tried at the bottom during the export process but I can't seem to get it to change. It keeps exporting the excel with 00:00:00. This is easy to change in excel after I open it, but I want it so I dont have to alter the final excel product.

I want to ensure that it remains a date function when it exports to excel. I have seen a lot of string conversion functions, but I'm pretty sure that will negatively affect my excel columns once exported.

Also, I have no doubt that someone can easily fix this issue, but can you let me know if you see an issue that isn't allowing me to change the format after I convert the columns to datetime? I feel like I should be able to change it after pretty easily.

import pandas as pd  
import datetime as dt
import xlwings


#new_WFRL
new_WFRL = pd.read_excel("H:/DIR/Human Resources/HR Audits/Raw Files/WFRL 6.24.24.xlsx", usecols=["Empl ID", "Name", "Grade", "Step", "Grade Entry Date", "Step Date", "WGI Due Dt"], parse_dates=["Grade Entry Date", "Step Date", "WGI Due Dt"],  date_format="%m/%d/%y", index_col=False).sort_values("Name")   
new_WFRL = new_WFRL.add_prefix("New ")  
new_WFRL[['New Grade Entry Date', 'New Step Date', 'New WGI Due Dt']] = new_WFRL[['New Grade Entry Date', 'New Step Date', 'New WGI Due Dt']].apply(pd.to_datetime, format='mixed')


#old_WFRL  
old_WFRL = pd.read_excel("H:/DIR/Human Resources/HR Audits/Raw Files/WFRL 6.14.24.xlsx", usecols=["Empl ID", "Name", "Grade", "Step", "Grade Entry Date", "Step Date", "WGI Due Dt"], parse_dates=["Grade Entry Date", "Step Date", "WGI Due Dt"], date_format="%m/%d/%y", index_col=False).sort_values("Name")   
old_WFRL = old_WFRL.add_prefix("Old ") 

#this converts the specific columns to datetime data types - can confirm with old_WFRL.info()
old_WFRL[['Old Grade Entry Date', 'Old Step Date', 'Old WGI Due Dt']] = old_WFRL[['Old Grade Entry Date', 'Old Step Date', 'Old WGI Due Dt']].apply(pd.to_datetime, format = '%Y-%m-%d %H:%M:%S')

#old_WFRL[['Old Grade Entry Date', 'Old Step Date', 'Old WGI Due Dt']] = old_WFRL[['Old Grade Entry Date', 'Old Step Date', 'Old WGI Due Dt']].apply(lambda x: dt.datetime.strftime(x, '%m-%d-%Y'))
  
merged_WFRL = pd.merge(old_WFRL, new_WFRL, how = "outer", left_on = "Old Empl ID", right_on = "New Empl ID")  
merged_WFRL 
#this is the function that will show a change  

def compare_WFRL(df):  
    if df["Old Grade"] == df["New Grade"] and df["Old Step"]== df["New Step"]:  
        return 1  
    else:  
        return 0  

#applies the above function  
merged_WFRL["changes"] = merged_WFRL.apply(compare_WFRL, axis =1)  

#filters all the rows so changes are the only ones left  
merged_changes = merged_WFRL[merged_WFRL["changes"] == 0]
merged_changes.pop('changes')  

# Export DataFrame to Excel
export_file = "H:/DIR/Human Resources/HR Audits/WFRL Comparison Reports/6.24.24v7.xlsx"
merged_changes.to_excel(export_file, index=False)

######################################################################################################
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder
from openpyxl.utils import get_column_letter
from openpyxl.styles import NamedStyle


'''
tried to us xlwings but still having error in coding - taken from SO as a guide. Not all info
has been updated to relate to my info. 
with pd.ExcelWriter(export_file, engine='xlwings', date_format="mm dd yyyy", datetime_format="mm dd yyyy") as writer:
    merged_changes.to_excel(writer, sheet_name='Sheet1', index=False)
    workbook  = writer.book
    worksheet = writer.sheets['Sheet1']
    formatdict = {'num_format':'mm/dd/yyyy'}
    fmt = workbook.add_format(formatdict)
    #worksheet.set_column('A:B', 20, fmt)
'''

# Load the workbook and select the active worksheet
wb = load_workbook(export_file)
ws = wb.active

# Define cell styles
yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")

# Apply conditional formatting
for row in range(2, len(merged_changes) + 2):
    old_grade_cell = ws.cell(row=row, column=2)  # Old Grade
    new_grade_cell = ws.cell(row=row, column=10)  # New Grade
    old_step_cell = ws.cell(row=row, column=3)  # Old Step
    new_step_cell = ws.cell(row=row, column=11)  # New Step
    
    # Check for New Grade not equal to Old Grade
    if new_grade_cell.value != old_grade_cell.value:
        new_grade_cell.fill = yellow_fill
    
    # Check for New Step not equal to Old Step
    if new_step_cell.value != old_step_cell.value:
        new_step_cell.fill = red_fill

# Applys formating to column width
dims = {}
for row in ws.rows:
    for cell in row:
        if cell.value:
            dims[cell.column_letter] = max((dims.get(cell.column_letter, 0), len(str(cell.value))))
for col, value in dims.items():
    ws.column_dimensions[col].width = value *1.25   #*1.25 added additional width to the column. The formatting didn't give it enough space, this expanded it further. 


# Save the workbook
wb.save(export_file)

Solution

  • you can remove the time from datetime objects using the .dt accessor:

    df = pd.DataFrame({'A': pd.date_range("2018-01-01", periods=3, freq="h"),
                       'B': pd.date_range("2018-01-01", periods=3, freq="h")})
    print(df)
    
                        A                   B
    0 2018-01-01 00:00:00 2018-01-01 00:00:00
    1 2018-01-01 01:00:00 2018-01-01 01:00:00
    2 2018-01-01 02:00:00 2018-01-01 02:00:00
    
    cols = ['A', 'B']
    df[cols] = df[cols].apply(lambda x: pd.to_datetime(x).dt.date)
    print(df)
    
                A           B
    0  2018-01-01  2018-01-01
    1  2018-01-01  2018-01-01
    2  2018-01-01  2018-01-01
    

    so for your instance:

    new_cols = ['New Grade Entry Date', 'New Step Date', 'New WGI Due Dt']
    new_WFRL[new_cols] = new_WFRL[new_cols].apply(lambda x: pd.to_datetime(x).dt.date)
    
    old_cols = ['Old Grade Entry Date', 'Old Step Date', 'Old WGI Due Dt']
    old_WFRL[old_cols] = old_WFRL[old_cols].apply(lambda x: pd.to_datetime(x).dt.date)