excelxmlvbaautomationfilesystemobject

To count tags in XML from folders and subfolders


The below mention code can successfully count the required tags in an XML files and also provides name of file and tag count in excel sheet. I have just one query that currently it only reads the folder individually. However if there are 300 folders in a parent folder i need to select each folder every time. Is there anyway if anyone can amend the code so that if there are 300 folders in a parent folder in read each and every file (XML) in all subfolders. This will be very helpful for me.

I have tried to do it my own but this is beyond my capacity.

Option Explicit

Sub process_folder()

    Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
    Set wb = ThisWorkbook
    Set ws = wb.Sheets(1)
    ws.UsedRange.Clear
    ws.Range("A1:C1") = Array("Source", "<Headline> Tag Count")
    iRow = 1
    
    ' create FSO and regular expression pattern
    Dim FSO As Object, ts As Object, regEx As Object, txt As String
    Set FSO = CreateObject("Scripting.FileSystemObject")
    Set regEx = CreateObject("VBScript.RegExp")
    With regEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .pattern = "<Headline>(.*)</Headline>"
   
        
    End With

    'Opens the folder picker dialog to allow user selection
    Dim myfolder As String, myfile As String, n As Long
    With Application.FileDialog(msoFileDialogFolderPicker)
        .Title = "Please select a folder"
        .Show
        .AllowMultiSelect = False
        If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
            MsgBox "You did not select a folder"
            Exit Sub
        End If
        myfolder = .SelectedItems(1) & "\" 'Assign selected folder to MyFolder
    End With
    
    'Loop through all files in a folder until DIR cannot find anymore
    Application.ScreenUpdating = False
    myfile = Dir(myfolder & "*.xml")
    Do While myfile <> ""
    
        iRow = iRow + 1
        ws.Cells(iRow, 1) = myfile
    
        ' open file and read all lines
        Set ts = FSO.OpenTextFile(myfolder & myfile)
        txt = ts.ReadAll
        ts.Close
                                   
        ' count pattern matches
        Dim m As Object
        If regEx.test(txt) Then
            Set m = regEx.Execute(txt)
            ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
            ws.Cells(iRow, 3) = m.Count
            
        Else
            ws.Cells(iRow, 2) = "No tags"
            ws.Cells(iRow, 3) = 0
        End If

        myfile = Dir 'DIR gets the next file in the folder
    Loop
    
    ' results
    ws.UsedRange.Columns.AutoFit
    Application.ScreenUpdating = True
    

End Sub

Solution

  • Use Subfolders property of the parent folder object.

    Option Explicit
    
    Sub process_folder()
    
        Dim iRow As Long, wb As Workbook, ws As Worksheet, ar
        Set wb = ThisWorkbook
        Set ws = wb.Sheets(1)
        ws.UsedRange.Clear
        ws.Range("A1:B1") = Array("Source", "<Headline> Tag Count")
        iRow = 1
        
        ' create FSO and regular expression pattern
        Dim fso As Object, ts As Object, regEx As Object, txt As String
        Set fso = CreateObject("Scripting.FileSystemObject")
        Set regEx = CreateObject("VBScript.RegExp")
        With regEx
            .Global = True
            .MultiLine = True
            .IgnoreCase = True
            .Pattern = "<Headline>(.*)</Headline>"
        End With
    
        'Opens the folder picker dialog to allow user selection
        Dim myfolder, myfile As String, n As Long
        Dim parentfolder As String, oParent
       
        With Application.FileDialog(msoFileDialogFolderPicker)
            .Title = "Please select a folder"
            .Show
            .AllowMultiSelect = False
            If .SelectedItems.Count = 0 Then 'If no folder is selected, abort
                MsgBox "You did not select a folder"
                Exit Sub
            End If
            parentfolder = .SelectedItems(1) 'Assign selected folder to ParentFolder
        End With
        Set oParent = fso.getFolder(parentfolder)
        
        ' build collection or files
        Dim colFiles As Collection
        Set colFiles = New Collection
        Call GetFiles(oParent, "xml", colFiles)
        
        'Loop through all files in collection
        Application.ScreenUpdating = False
        For n = 1 To colFiles.Count
            myfile = colFiles(n)
            
            iRow = iRow + 1
            ws.Cells(iRow, 1) = myfile
        
            ' open file and read all lines
            Set ts = fso.OpenTextFile(myfile)
            txt = ts.ReadAll
            ts.Close
                                       
            ' count pattern matches
            Dim m As Object
            If regEx.test(txt) Then
                Set m = regEx.Execute(txt)
                ws.Cells(iRow, 2) = m(0).SubMatches(0) ' get date from first match
                ws.Cells(iRow, 3) = m.Count
            Else
                ws.Cells(iRow, 2) = "No tags"
                ws.Cells(iRow, 3) = 0
            End If
    
            ' results
            ws.UsedRange.Columns.AutoFit
        Next
        Application.ScreenUpdating = True
        MsgBox colFiles.Count & " Files process", vbInformation
    
    End Sub
    
    Sub GetFiles(oFolder, ext, ByRef colFiles)
    
        Dim f As Object
        For Each f In oFolder.Files
            If f.Name Like "*." & ext Then
                colFiles.Add oFolder.Path & "\" & f.Name
            End If
        Next
        
         ' call recursively
        For Each f In oFolder.subfolders
            Call GetFiles(f, ext, colFiles)
        Next
         
    End Sub