I want to know if a line of code in a certain format exists in a workbook's modules, in this case a MsgBox with a string literal, by VBA regex.
pattern1
is found, but pattern2
is not found even when it exists in the module.
Pattern1
matches MsgBox with string literals, with opening and closing parentheses( ) e.g.:
testString = MsgBox("Do you want to open our product page?", vbInformation + vbYesNoCancel, "myMessageBox")
testString = MsgBox("Do you want to open our product page?", vbYesNoCancel, "myMessageBox")
testString = MsgBox("Do you want to open our product page?", vbYesNoCancel)
testString = MsgBox("Do you want to open our product page?")
Pattern2
matches MsgBox with string literals, without opening and closing parentheses( ), e.g.:
MsgBox "Do you want to open our product page?", vbInformation + vbYesNoCancel, "myMessageBox"
MsgBox "Do you want to open our product page?", vbInformation + vbYesNoCancel
MsgBox "Do you want to open our product page?"
' Matches strings in all VBA modules of the specified workbook
' Add a reference to "Microsoft VBScript Regular Expressions 5.5" for early binding
Public Sub FindStringsInModules(ByVal objWorkbookToObfuscate As Workbook)
Dim vbComp As Object
Dim lineNum As Long
Dim lineText As String
Dim newLineText As String
' Iterate over all VBA modules in the provided workbook
For Each vbComp In objWorkbookToObfuscate.VBProject.VBComponents
' Process each line in the module
Dim lineCount As Long
lineCount = vbComp.codeModule.CountOfLines
For lineNum = 1 To lineCount
lineText = vbComp.codeModule.Lines(lineNum, 1)
newLineText = MatchLine(lineText)
vbComp.codeModule.ReplaceLine lineNum, newLineText
Next lineNum
Next vbComp
End Sub
Function MatchLine(ByVal lineText As String) As String
Dim regex As RegExp
Set regex = New RegExp
Dim pattern1 As String
Dim pattern2 As String
Dim pattern3 As String
' Pattern 1: Matches MsgBox with string literals, with opening and closing parenthesis ( )
pattern1 = "(?:[a-zA-Z0-9]+\s*=\s*)?MsgBox\s*\(\s*(""[^""]*"")(?:\s*,\s*[^,)]+(?:\s*\+\s*[^,)]+)*)?(?:\s*,\s*""[^""]*"")?\s*\)"
' Pattern 2: Matches MsgBox without parentheses, including additional parameters and concatenations
pattern2 = "MsgBox\s+""""[^""""]*""""(?:\s*&\s*""""[^""""]*""""|[^""""])*(?:\s*,\s*[^,""""]+)*"
pattern3 = "MsgBox\(""[^""]*""\)\s*"
' Combine patterns using alternation operator
regex.Pattern = pattern1 & "|" & pattern2 & "|" & pattern3
regex.Global = True
regex.MultiLine = True
Dim matches As MatchCollection
Set matches = regex.Execute(lineText)
' Process each match for potential transformation
If matches.Count > 0 Then
Dim match As match
For Each match In matches
Dim literal As String
literal = match.SubMatches(0)
If literal = "" Then
Debug.Print "lineText not catched:" & lineText
'Stop
End If
Next match
End If
MatchLine = lineText
End Function
match.SubMatches(0)
returns that literal = ""
even though matches.Count > 0
.
P.S. When testing against a string literal in VBA, the MsgBox line
needs to be enclosed in double quotes, with inner double quotes doubled, for example, testString = "MsgBox ""Do you want to open our product page?"""
.
When testing against a line of VBA code (not as a string literal), the MsgBox line would appear as directly written in the code, for example, MsgBox "Do you want to open our product page?"
.
Regex.Test(Text)
returns true if there is a match. Here is how I would refactor the code:
Sub ExampleUsage()
Const Pattern1 As String = "(?:[a-zA-Z0-9]+\s*=\s*)?MsgBox\s*\(\s*(""[^""]*"")(?:\s*,\s*[^,)]+(?:\s*\+\s*[^,)]+)*)?(?:\s*,\s*""[^""]*"")?\s*\)"
Const Pattern2 As String = "MsgBox\s+""""[^""""]*""""(?:\s*&\s*""""[^""""]*""""|[^""""])*(?:\s*,\s*[^,""""]+)*"
Const Pattern3 As String = "MsgBox\(""[^""]*""\)\s*"
Dim MasterPattern As String
MasterPattern = Join(Pattern1, Pattern2, Pattern3)
Const NewText As String = "MsgBox(""Sample Text"")"
Dim Patterns() As Variant
Patterns = Array(Pattern1, Pattern2, Pattern3)
ReplaceMatchesInModule ThisWorkbook, NewText, MasterPattern
End Sub
Public Sub ReplaceMatchesInModule(ByVal objWorkbookToObfuscate As Workbook, NewText As String, Pattern As String)
Rem Requires the reference to "Microsoft Visual Basic for Applications Extensibility #.#" library
Dim Component As VBComponent
Dim CodeModule As CodeModule
Dim lineNum As Long
Dim lineText As String
' Iterate over all VBA modules in the provided workbook
For Each vbComp In objWorkbookToObfuscate.VBProject.VBComponents
' Process each line in the module
Dim lineCount As Long
lineCount = Component.CodeModule.CountOfLines
For lineNum = 1 To lineCount
Set CodeModule = Component.CodeModule
lineText = CodeModule.Lines(lineNum, 1)
If RegExMatch(Pattern) Then
CodeModule.ReplaceLine lineNum, NewText
End If
Next lineNum
Next vbComp
End Sub
Public Function RegExMatch(Text As String, GlobalMatch As Boolean, Multiline As Boolean, ByRef Patterns() As Variant)
Rem Requires the reference to "Microsoft VBScript Regular Expressions 5.5" library
Dim Regex As New RegExp
Regex.Global = GlobalMatch
Regex.Multiline = Multiline
Dim Pattern As Variant
For Each Pattern In Patterns
If Regex.Test(Text) Then
RegExMatch = True
Exit Function
End If
Next
End Function
Testing Regex is tedious. regex101.com is a popular website for testing.
Expression:
(?:[a-zA-Z0-9]+\s*=\s*)?MsgBox\s*(\s*("[^"]")(?:\s,\s*[^,)]+(?:\s*+\s*[^,)]+))?(?:\s,\s*"[^"]")?\s)|MsgBox\s+""[^""]""(?:\s&\s*""[^""]""|[^""])(?:\s*,\s*[^,""]+)|MsgBox("[^"]")\s*