I want to compare 2 excels files [Having only 1 sheet in both] having 10-15 columns and rows will be more than 30K. We got one excel macro file which complete the comparison within 5-10Mins. Limitation of this macro is that it can compare only 2-3 columns at a time. So every time we need to run this macro multiple times which is time consuming process. So I created one utility file [.vbs file] which perform this task in one go but it takes around 1-3Hrs. Is there any other way to perform this comparison in short time in one go?
startTime=Timer()
Set objExcel=Createobject("Excel.application")
objExcel.Visible=True
Set objWorkbook=objExcel.Workbooks.Open("E:\QTP trial version\Data.xls")
'Set deleteAnalysis_CopySheet=objWorkbook.sheets("Analysis_Copy")
'deleteAnalysis_CopySheet.delete
'Set deleteSummarySheet=objWorkbook.sheets("Summary")
'deleteSummarySheet.delete
Set objAnalysis_Copy=objWorkbook.sheets.add
objAnalysis_Copy.name="Analysis_Copy"
Set objSummary=objWorkbook.sheets.add
objSummary.name="Summary"
objSummary.Cells(1,1)="Analysis Row Count"
objSummary.Cells(2,1)="Reporting Row Count"
objSummary.Cells(3,1)="Analysis Column Count"
objSummary.Cells(4,1)="Reporting Column Count"
objSummary.Cells(5,1)="Difference of Row Count"
objSummary.Cells(6,1)="Difference of Column Count"
objSummary.Cells(7,1)="False Count"
' ------------------------1st Check - Verify the position of ''Metrics' in Analysis and Reporting tab. It must be same---------------------
'Get the control of Analysis tab
Set objAnalysis=objExcel.Worksheets.Item("Analysis")
intAnalysisRowCount=objAnalysis.Usedrange.rows.count
objSummary.Cells(1,2)=intAnalysisRowCount
intAnalysisColCount=objAnalysis.Usedrange.Columns.count
objSummary.Cells(3,2)=intAnalysisColCount
'Get Column number of 'Metric' Column from Analysis tab
For intMetricAnalysis=1 to intAnalysisColCount
If(Trim(Lcase(objAnalysis.Cells(1,intMetricAnalysis)))=Trim(Lcase("Metrics"))) Then
Exit for
End If
Next
'Get all Analysis columns in 1 string
strAnalysisColumnOrder=""
For intAnalysisColumnOrder=1 to intAnalysisColCount
strAnalysisColumnOrder=strAnalysisColumnOrder&"*"&objAnalysis.Cells(1,intAnalysisColumnOrder)
If(intAnalysisColumnOrder=1) then
strAnalysisColumnOrder=Replace(strAnalysisColumnOrder,"*","")
End If
Next
Set objReporting=objExcel.Worksheets.Item("Reporting")
intReportingRowCount=objReporting.Usedrange.rows.count
objSummary.Cells(2,2)=intReportingRowCount
intReportingColCount=objReporting.Usedrange.Columns.count
objSummary.Cells(4,2)=intReportingColCount
''Get Column number of 'Metric' Column from Reporting tab
For intMetricReporting=1 to intReportingColCount
If(Trim(Lcase(objReporting.Cells(1,intMetricReporting)))=Trim(Lcase("Metrics"))) Then
Exit for
End If
Next
'Get all Reporting columns in 1 string
strReportingColumnOrder=""
For intReportingColumnOrder=1 to intAnalysisColCount
strReportingColumnOrder=strReportingColumnOrder&"*"&objReporting.Cells(1,intReportingColumnOrder)
If(intReportingColumnOrder=1) then
strReportingColumnOrder=Replace(strReportingColumnOrder,"*","")
End If
Next
''Metric' column number must be same
If(intMetricAnalysis<>intMetricReporting) then
msgbox "Merics column is at "&intMetricAnalysis&" position in 'Analysis' Tab And at "&intMetricReporting&" position in 'Reporting' tab. 'Metrics' column should be at same position in both tab."
strMetricsFlag=False
Else
strMetricsFlag=True
End IF
'-----------2nd Check, Verify count of columns in 'Analysis' And 'Reporting' tab . It Must be same
If intAnalysisColCount<>intReportingColCount Then
msgbox "Column count of 'Reporting' Tab is not same as of 'Analysis tab'."
strAnalysisColCount=False
Else
strAnalysisColCount=True
End If
''---------------3rd Check , Verify Order of columns in 'Analysis' And 'Reporting' tab . It Must be same
If Trim(Lcase(strAnalysisColumnOrder))<>Trim(Lcase(strReportingColumnOrder)) then
msgbox "Column order of 'Reporting' Tab is not same as of 'Analysis tab'. Reporting column order should be "&strAnalysisColumnOrder
strAnalysisColumnOrderFlag=False
Else
strAnalysisColumnOrderFlag=True
End IF
'Creare 'Analysis_Copy' tab and add headers
Set objAnalysisCopy=objExcel.Worksheets.Item("Analysis_Copy")
strFirstCoulmn_AggKeys=""
For intHeaderAggkey=1 to intMetricAnalysis-1
strFirstCoulmn_AggKeys=strFirstCoulmn_AggKeys&"*"&objAnalysis.Cells(1,intHeaderAggkey)
If(intHeaderAggkey=1) then
strFirstCoulmn_AggKeys=Replace(strFirstCoulmn_AggKeys,"*","")
End If
Next
objAnalysisCopy.Cells(1,1)=strFirstCoulmn_AggKeys
strSecondCoulmn_AnalysisMetrics=""
For intHeaderAnalysisMetrics=intMetricAnalysis+1 to intAnalysisColCount
strSecondCoulmn_AnalysisMetrics=strSecondCoulmn_AnalysisMetrics&"*"&objAnalysis.Cells(1,intHeaderAnalysisMetrics)
If(intHeaderAnalysisMetrics=intMetricAnalysis+1 ) then
strSecondCoulmn_AnalysisMetrics=Replace(strSecondCoulmn_AnalysisMetrics,"*","")
End If
Next
objAnalysisCopy.Cells(1,2)="Analysis_"&strSecondCoulmn_AnalysisMetrics
objAnalysisCopy.Cells(1,3)="Reporting_"&strSecondCoulmn_AnalysisMetrics
objAnalysisCopy.Cells(1,4)="Status"
objWorkbook.Save
'$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
If strAnalysisColumnOrderFlag=False OR strMetricsFlag=False OR strAnalysisColCount=False Then
msgbox "So Data Comparision can not be done"
objWorkbook.Save
objWorkbook.Close
objExcel.Quit
Else
intFalseCount=0
For intAnalysisRow=2 to intAnalysisRowCount
' ------ Get the control of ''Analysis' tab and the string of Aggrecate Keys [strAnalysisAggrData] and respective metrics [strAnalysisMetricsData]
Set objAnalysis=objExcel.Worksheets.Item("Analysis")
' Append all data of each row which is before 'Metrics' column
strAnalysisAggrData=""
For intAnalysisColumn=1 to intMetricAnalysis-1
strAnalysisAggrData=strAnalysisAggrData&"*"&objAnalysis.Cells(intAnalysisRow,intAnalysisColumn)
If(intAnalysisColumn=1) then
strAnalysisAggrData=Replace(strAnalysisAggrData,"*","")
End If
Next
' ' Append all data of each row which is after 'Metrics' column
strAnalysisMetricsData=""
For intFromMetric=intMetricAnalysis+1 to intAnalysisColCount
strAnalysisMetricsData=strAnalysisMetricsData&"*"&objAnalysis.Cells(intAnalysisRow,intFromMetric)
If(intFromMetric=intMetricAnalysis+1 ) then
strAnalysisMetricsData=Replace(strAnalysisMetricsData,"*","")
End If
Next
' ------ Get the control of ''Reporting' tab and the string of Aggrecate Keys [strAnalysisAggrData] and respective metrics [strAnalysisMetricsData]
Set objReporting=objExcel.Worksheets.Item("Reporting")
For intReportingRow=1 to intReportingRowCount
' Append all data of each row which is before 'Metrics' column
strReportingAggrData=""
For intBeforeMetricReporting=1 to intMetricReporting-1
strReportingAggrData=strReportingAggrData&"*"&objReporting.Cells(intReportingRow,intBeforeMetricReporting)
If(intBeforeMetricReporting=1) then
strReportingAggrData=Replace(strReportingAggrData,"*","")
End If
Next
' Append all data of each row which is after 'Metrics' column
strReportingMetricsData=""
For intFromReportingMetric=intMetricReporting+1 to intReportingColCount
strReportingMetricsData=strReportingMetricsData&"*"&objReporting.Cells(intReportingRow,intFromReportingMetric)
If(intFromReportingMetric=intMetricReporting+1 ) then
strReportingMetricsData=Replace(strReportingMetricsData,"*","")
End If
Next
'------------------------------------------------------------ Actual Comparision will be from here ------------------------------------------
If Trim(LCase(strAnalysisAggrData))=Trim(LCase(strReportingAggrData)) Then
objAnalysisCopy.Cells(intAnalysisRow,1)=strAnalysisAggrData
objAnalysisCopy.Cells(intAnalysisRow,2)=strAnalysisMetricsData
objAnalysisCopy.Cells(intAnalysisRow,3)=strReportingMetricsData
'Compare Metrics Data
If Trim(LCase(strAnalysisMetricsData))=Trim(LCase(strReportingMetricsData)) Then
objAnalysisCopy.Cells(intAnalysisRow,4)="PASS"
objAnalysisCopy.Cells(intAnalysisRow,4).font.color=vbGreen
Else
objAnalysisCopy.Cells(intAnalysisRow,4)="FAIL"
intFalseCount=intFalseCount+1
objAnalysisCopy.Cells(intAnalysisRow,4).font.color=vbRed
End If
Exit For
End If
Next
Next
objSummary.Cells(5,2)=intAnalysisRowCount-intReportingRowCount
objSummary.Cells(6,2)=intAnalysisColCount-intReportingColCount
objSummary.Cells(7,2)=intFalseCount
objSummary.Cells(7,2).font.color=vbRed
objWorkbook.Save
objWorkbook.Close
objExcel.Quit
EndTime=Timer()
TotalTime=EndTime-startTime
msgbox "Data Comparision is Completed. Comparision time is "&TotalTime&"Secs"
End If
Use a dictionary and you avoid the nested loops and only scan each sheet once. For example as a VBA macro (untested)
Sub compare()
Dim wb As Workbook
Dim ws(2) As Worksheet, wsSum As Worksheet, wsCopy As Worksheet
Dim rowCount(2) As Long, colCount(2) As Integer, colMetric(2) As Integer
Dim colsMetric(2) As String, colsAll(2) As String, colsKeys(2) As String
Dim bMetricsFlag As Boolean, bColCountFlag As Boolean, bColOrderFlag As Boolean
Dim i As Long, ar, msg As String, intFalseCount As Long
Dim t0 as Single
t0 = Timer
Set wb = ThisWorkbook
Set ws(1) = wb.Sheets("Analysis")
Set ws(2) = wb.Sheets("Reporting")
Set wsSum = wb.Sheets("Summary")
wsSum.Cells.Clear
wsSum.Range("A1:A7") = WorksheetFunction.Transpose(Array("Analysis Row Count", _
"Reporting Row Count", "Analysis Column Count", "Reporting Column Count", _
"Difference of Row Count", "Difference of Column Count", "False Count"))
Set wsCopy = wb.Sheets("Analysis_Copy")
wsCopy.Cells.Clear
' get stats for each sheet 1-Analyis 2=Reporting
For i = 1 To 2
ar = Stats(ws(i))
rowCount(i) = ar(0)
colCount(i) = ar(1)
colMetric(i) = ar(2)
colsAll(i) = ar(3)
colsMetric(i) = ar(4)
colsKeys(i) = ar(5)
Next
' summary
With wsSum
.Cells(1, 2) = rowCount(1)
.Cells(2, 2) = rowCount(2)
.Cells(3, 2) = colCount(1)
.Cells(4, 2) = colCount(2)
End With
' check stats
'Metric' column number must be same
If colMetric(1) = 0 Or colMetric(2) = 0 Or colMetric(1) <> colMetric(2) Then
msg = "Metrics columns not the same or missing : " & vbCr & _
"Analysis : " & colMetric(1) & vbCr & _
"Reporting : " & colMetric(2)
MsgBox msg, vbCritical
bMetricsFlag = False
Else
bMetricsFlag = True
End If
' Verify count of columns
If colCount(1) <> colCount(2) Then
msg = "Column counts not the same : " & vbCr & _
"Analysis : " & colCount(1) & vbCr & _
"Reporting : " & colCount(2)
MsgBox msg, vbCritical
bColCountFlag = False
Else
bColCountFlag = True
End If
'Verify Order of columns
If colsAll(1) <> colsAll(2) Then
msg = "Column order not the same : " & vbCr & _
"Analysis : " & colsAll(1) & vbCr & _
"Reporting : " & colsAll(2)
MsgBox msg, vbCritical
bColOrderFlag = False
Else
bColOrderFlag = True
End If
With wsCopy
.Cells(1, 1) = colsKeys(1)
.Cells(1, 2) = "Analysis_" & colsMetric(1)
.Cells(1, 3) = "Reporting_" & colsMetric(2)
.Cells(1, 4) = "Status"
End With
' checks OK ?
If bColOrderFlag And bMetricsFlag And bColCountFlag Then
' ok
Else
MsgBox "So Data Comparision can not be done", vbCritical
Exit Sub
End If
' start comparison
Dim dict As Object, m As Long, c As Long, s As String
Dim sKey As String, sMetric As String
Set dict = CreateObject("Scripting.Dictionary")
' scan Reporting sheet to build dictionary
m = colMetric(2)
For i = 1 To rowCount(2)
'join cols up to and after metric col
sMetric = "": sKey = ""
For c = 1 To colCount(2)
s = Trim(ws(2).Cells(i, c))
If c < m Then
If sMetric <> "" Then sMetric = sMetric & "*"
sMetric = sMetric & s
ElseIf c > m Then
If sKey <> "" Then sKey = sKey & "*"
sKey = sKey & s
End If
Next
dict(sKey) = sMetric
Next
' scan Analysis sheet to compare dictionary
m = colMetric(1)
For i = 2 To rowCount(1)
'join cols up to and after metric col
sMetric = "": sKey = ""
For c = 1 To colCount(1)
s = Trim(ws(1).Cells(i, c))
If c < m Then
If sMetric <> "" Then sMetric = sMetric & "*"
sMetric = sMetric & s
ElseIf c > m Then
If sKey <> "" Then sKey = sKey & "*"
sKey = sKey & s
End If
Next
' result
wsCopy.Cells(i, 1) = sKey
wsCopy.Cells(i, 2) = sMetric
wsCopy.Cells(i, 3) = dict(sKey)
' pass or fail
If sMetric = dict(sKey) Then
wsCopy.Cells(i, 4) = "PASS"
wsCopy.Cells(i, 4).Font.Color = vbGreen
Else
wsCopy.Cells(i, 4) = "FAIL"
wsCopy.Cells(i, 4).Font.Color = vbRed
intFalseCount = intFalseCount + 1
End If
Next
With wsSum
.Cells(5, 2) = rowCount(1) - rowCount(2)
.Cells(6, 2) = colCount(1) - colCount(2)
.Cells(7, 2) = intFalseCount
.Cells(7, 2).Font.Color = vbRed
End With
MsgBox i - 2 & " rows scanned " & vbCrLf & _
intFalseCount & " FAILED", vbInformation, Int(Timer - t0) & "seconds"
End Sub
Function Stats(ws As Worksheet) As Variant
Dim c As Integer, ar(5) As Variant, s As String
ar(0) = ws.UsedRange.Rows.Count
ar(1) = ws.UsedRange.Columns.Count
ar(2) = 0 'metric column
ar(3) = "" ' col aggregated
ar(4) = "" ' cols upto not including metric
ar(5) = "" ' cols after metric
For c = 1 To ar(1)
s = LCase(Trim(ws.Cells(1, c)))
If s = "metric" Then
ar(2) = c
End If
' aggregate headers before/after metric
If ar(2) = 0 Then
If ar(4) <> "" Then ar(4) = ar(4) & "*"
ar(4) = ar(4) & s
ElseIf c > ar(2) Then
If ar(5) <> "" Then ar(5) = ar(5) & "*"
ar(5) = ar(5) & s
End If
' aggregate all
If ar(3) <> "" Then ar(3) = ar(3) & "*"
ar(3) = ar(3) & s
Next
Stats = ar
End Function
Test data generator
Sub testdata()
Dim ws As Worksheet, n, r, c, ar
ar = Array("", "Analysis", "Reporting")
For n = 1 To 2
Set ws = Sheets(ar(n))
For r = 1 To 30000
For c = 1 To 15
ws.Cells(r, c) = Chr(64 + c) & r & "_abcdefghijklmnopqrstuvwxyz_"
Next
Next
ws.Cells(1, 10) = "metric" ' col J
Next
MsgBox "test data created"
End Sub