hyperlinklibreoffice-basicopenoffice-basiclibreoffice-writer

How do I check for broken internal links in Star Basic?


I am creating a Basic macro for LibreOffice Writer to check for broken internal links. In a nutshell:

My code has a few unsolved issues:

  1. (within fnBuildAnchorList) How do we get the numbering for each heading? For example, if the first level 1 heading text is “Introduction”, the correct anchor is #1.Introduction|outline and we are recording Introduction|outline
  2. (within subInspectLink) How do we properly test a hyperlink to a heading? I notice that when I manually follow a link to a heading, it will succeed when the numbering is the same, but also when the text is the same.
    e.g. if there’s an internal link #1.My first heading|outline, it can be reached with the hyperlink #1.Previous header name|outline but also with the hyperlink #2.3.5.My first heading|outline
  3. (within subInspectLink) How do we open a specific hyperlink for editing? Do we pass parameters to .uno:EditHyperlink? Do we move the cursor? (All moves I found were relative, e.g. .uno:GoRight) Do we use the text portion’s .Start and .End properties?
REM  *****  BASIC  *****
Option Explicit


' PrintArray displays a MsgBox with the whole array
' for DEBUG purposes only
Sub subPrintArray(sTitle as String, theArray() as String)
    Dim sArray
    sArray = sTitle & ":" & Chr(13) & Join(theArray,Chr(13))
    MsgBox(sArray, 64, "***DEBUG")
End sub

' auxiliary sub for BuildAnchorList
Sub subAddItemToAnchorList (oAnchors() as String, sTheAnchor as String, sType as String)
    Dim sAnchor
    Select Case sType
        Case "Heading":
            sAnchor = sTheAnchor + "|outline"
        Case "Table":
            sAnchor = sTheAnchor + "|table"
        Case "Text Frame":
            sAnchor = sTheAnchor + "|frame"
        Case "Image":
            sAnchor = sTheAnchor + "|graphic"
        Case "Object":
            sAnchor = sTheAnchor + "|ole"
        Case "Section":
            sAnchor = sTheAnchor + "|region"
        Case "Bookmark":
            sAnchor = sTheAnchor
    End Select
    ReDim Preserve oAnchors(UBound(oAnchors)+1) as String
    oAnchors(UBound(oAnchors)) = sAnchor
End Sub

' auxiliary sub for BuildAnchorList
Sub subAddArrayToAnchorList (oAnchors() as String, oNewAnchors() as String, sType as String)
    Dim i, iStart, iStop
    iStart = LBound(oNewAnchors)
    iStop = UBound(oNewAnchors)
    If iStop < iStart then Exit Sub ' empty array, nothing to do
    For i = iStart to iStop
        subAddItemToAnchorList (oAnchors, oNewAnchors(i), sType)
    Next
End Sub

Function fnBuildAnchorList()
    Dim oDoc as Object, oAnchors() as String
    oDoc = ThisComponent

    ' get the whole document outline
    Dim oParagraphs, thisPara, oTextPortions, thisPortion
    oParagraphs = oDoc.Text.createEnumeration ' all the paragraphs
    Do While oParagraphs.hasMoreElements
        thisPara = oParagraphs.nextElement
        If thisPara.ImplementationName = "SwXParagraph" then ' is a paragraph
            If thisPara.OutlineLevel>0 Then ' is a heading
                ' ***
                ' *** TO DO: How do we get the numbering for each heading?
                ' For example, if the first level 1 heading text is “Introduction”,
                ' the correct anchor is `#1.Introduction|outline`
                ' and we are recording `Introduction|outline`
                ' ***
                subAddItemToAnchorList (oAnchors, thisPara.String, "Heading")
            End if
        End if
    Loop
    ' text tables, text frames, images, objects, bookmarks and text sections
    subAddArrayToAnchorList(oAnchors, oDoc.getTextTables().ElementNames, "Table")
    subAddArrayToAnchorList(oAnchors, oDoc.getTextFrames().ElementNames, "Text Frame")
    subAddArrayToAnchorList(oAnchors, oDoc.getGraphicObjects().ElementNames, "Image")
    subAddArrayToAnchorList(oAnchors, oDoc.getEmbeddedObjects().ElementNames, "Object")
    subAddArrayToAnchorList(oAnchors, oDoc.Bookmarks.ElementNames, "Bookmark")
    subAddArrayToAnchorList(oAnchors, oDoc.getTextSections().ElementNames, "Section")

    fnBuildAnchorList = oAnchors
End Function

Function fnIsInArray( theString as String, theArray() as String )
    Dim i as Integer, iStart as Integer, iStop as Integer
    iStart = LBound(theArray)
    iStop = UBound(theArray)
    If iStart<=iStop then
        For i = iStart to iStop
            If theString = theArray(i) then
                fnIsInArray = True
                Exit function
            End if
        Next
    End if
    fnIsInArray = False
End function

Function fnIsOutlineInArray ( theString as String, theArray() as String )
    Dim i as Integer
    For i = LBound(theArray) to UBound(theArray)
        If theArray(i) = Right(theString,Len(theArray(i))) then
            fnIsOutlineInArray = True
            Exit function
        End if
    Next
    fnIsOutlineInArray = False
End function

' auxiliary function to FindBrokenInternalLinks
' inspects any links inside the current document fragment
' used to have an enumeration inside an enumeration, per OOo examples,
' but tables don't have .createEnumeration
Sub subInspectLinks( oAnchors as Object, oFragment as Object, iFragments as Integer, iLinks as Integer )
    Dim sMsg, sImplementation, thisPortion
    sImplementation = oFragment.implementationName
    Select Case sImplementation

        Case "SwXParagraph":
            ' paragraphs can be enumerated
            Dim oParaPortions, sLink, notFound
            oParaPortions = oFragment.createEnumeration
            ' go through all the text portions in current paragraph
            While oParaPortions.hasMoreElements
                thisPortion = oParaPortions.nextElement
                iFragments = iFragments + 1
                If Left(thisPortion.HyperLinkURL, 1) = "#" then
                    ' internal link found: get it all except initial # character
                    iLinks = iLinks + 1
                    sLink = right(thisPortion.HyperLinkURL, Len(thisPortion.HyperLinkURL)-1)
                    If Left(sLink,14) = "__RefHeading__" then
                        ' link inside a table of contents, no need to check
                        notFound = False
                    Elseif Right(sLink,8) = "|outline" then
                        ' special case for outline: since we don't know how to get the
                        ' outline numbering, we have to match the right most part of the
                        ' link only
                        notFound = not fnIsOutlineInArray(sLink, oAnchors)
                    Else
                        notFound = not fnIsInArray(sLink, oAnchors)
                    End if
                    If notFound then
                        ' anchor not found
                        ' *** DEBUG: code below up to MsgBox
                        sMsg = "Fragment #" & iFragments & ", internal link #" & iLinks & Chr(13) _
                            & "Bad link: [" & thisPortion.String & "] -> [" _
                            & thisPortion.HyperLinkURL & "] " & Chr(13) _
                            & "Paragraph:" & Chr(13) & oFragment.String & Chr(13) _
                            & "OK to continue, Cancel to stop"
                        Dim iChoice as Integer
                        iChoice = MsgBox (sMsg, 48+1, "Find broken internal link")
                        If iChoice = 2 Then End
                        ' ***
                        ' *** TO DO: How do we open a _specific_ hyperlink for editing?
                        ' Do we pass parameters to `.uno:EditHyperlink`?
                        ' Do we move the cursor? (Except all moves I found were relative,
                        ' e.g. `.uno:GoRight`)
                        ' Do we use the text portion’s `.Start` and `.End` properties?
                        ' ***
                    End If
                End if
            Wend
            ' *** END paragraph

        Case "SwXTextTable":
            ' text tables have cells
            Dim i, eCells, thisCell, oCellPortions
            eCells = oFragment.getCellNames()
            For i = LBound(eCells) to UBound(eCells)
                thisCell = oFragment.getCellByName(eCells(i))
                oCellPortions = thisCell.createEnumeration
                    While oCellPortions.hasMoreElements
                        thisPortion = oCellPortions.nextElement
                        iFragments = iFragments + 1
                        ' a table cell may contain a paragraph or another table,
                        ' so call recursively
                        subInspectLinks (oAnchors, thisPortion, iFragments, iLinks)
                    Wend
'               xray thisPortion
                'SwXCell has .String
            Next
            ' *** END text table

        Case Else
            sMsg = "Implementation method '" & sImplementation & "' not covered by regular code." _
                & "OK to continue, Cancel to stop"
            If 2 = MsgBox(sMsg, 48+1) then End
            ' uses xray for element inspection; if not available, comment the two following lines
            BasicLibraries.loadLibrary("XrayTool")
            xray oFragment
            ' *** END unknown case

    End Select
End sub

Sub FindBrokenInternalLinks
    ' Find the next broken internal link
    '
    ' Pseudocode:
    '
    ' * generate link of anchors - *** TO DO: prefix the outline numbering for headings
    ' * loop, searching for internal links
    '     - is the internal link in the anchor list?
    '         * Yes: continue to next link
    '         * No: (broken link found)
    '             - select that link text - *** TO DO: cannot select it
    '             - open link editor so user can fix this
    '             - stop
    ' * end loop
    ' * display message "No bad internal links found"

    Dim oDoc as Object, oFragments as Object, thisFragment as Object
    Dim iFragments as Integer, iLinks as Integer, sMsg as String
    Dim oAnchors() as String ' list of all anchors in the document
'   Dim sMsg ' for MsgBox

    oDoc = ThisComponent

    ' get all document anchors
    oAnchors = fnBuildAnchorList()
'   subPrintArray("Anchor list", oAnchors) ' *** DEBUG ***
'   MsgBox( UBound(oAnchors)-LBound(oAnchors)+1 & " anchors found – stand by for checking")

    ' find links    
    iFragments = 0 ' fragment counter
    iLinks = 0     ' internal link counter
    oFragments = oDoc.Text.createEnumeration ' has all the paragraphs
    While oFragments.hasMoreElements
        thisFragment = oFragments.nextElement
        iFragments = iFragments + 1
        subInspectLinks (oAnchors, thisFragment, iFragments, iLinks)
    Wend
    If iLinks then
        sMsg = iLinks & " internal links found, all good"
    Else
        sMsg = "This document has no internal links"
    End if
    MsgBox (sMsg, 64, "Find broken internal link")

End Sub

' *** END FindBrokenInternalLinks ***

You can check the first issue using any document with a heading – a MsgBox will pop up with all the anchors, and you’ll see the missing outline numbering.

The second issue needs a document with a bad internal link.


Solution

  • Check out cOOol. You could either use this instead of creating a macro, or else borrow some concepts from the code.

    Testing the links (possibly with .uno:JumpToMark) does not seem like it would be helpful, because internal links always go somewhere even if the target does not exist. Instead, construct a list of valid targets as you suggested.

    To hold the list of valid targets, the cOOol code uses a Python set. If you want to use Basic, then data structures are more limited. However it can be done either by declaring a new a Collection object or by using Basic arrays, perhaps with ReDim.

    Also have a look at how the cOOol code defines the valid target strings. For example:

    internal_targets.add('0.' * heading_level + data + '|outline')            
    

    To open the hyperlink dialog, select the hyperlinked text and then call:

    dispatcher.executeDispatch(document, ".uno:EditHyperlink", "", 0, Array())
    

    EDIT:

    Ok, I worked on this for several hours and came up with the following code:

    REM  *****  BASIC  *****
    Option Explicit
    
    
    ' PrintArray displays a MsgBox with the whole array
    ' for DEBUG purposes only
    Sub subPrintArray(sTitle as String, theArray() as String)
        Dim sArray
        sArray = sTitle & ":" & Chr(13) & Join(theArray,Chr(13))
        MsgBox(sArray, 64, "***DEBUG")
    End sub
    
    ' auxiliary sub for BuildAnchorList
    Sub subAddItemToAnchorList (oAnchors() as String, sTheAnchor as String, sType as String)
        Dim sAnchor
        Select Case sType
            Case "Heading":
                sAnchor = sTheAnchor + "|outline"
            Case "Table":
                sAnchor = sTheAnchor + "|table"
            Case "Text Frame":
                sAnchor = sTheAnchor + "|frame"
            Case "Image":
                sAnchor = sTheAnchor + "|graphic"
            Case "Object":
                sAnchor = sTheAnchor + "|ole"
            Case "Section":
                sAnchor = sTheAnchor + "|region"
            Case "Bookmark":
                sAnchor = sTheAnchor
        End Select
        ReDim Preserve oAnchors(UBound(oAnchors)+1) as String
        oAnchors(UBound(oAnchors)) = sAnchor
    End Sub
    
    ' auxiliary sub for BuildAnchorList
    Sub subAddArrayToAnchorList (oAnchors() as String, oNewAnchors() as String, sType as String)
        Dim i, iStart, iStop
        iStart = LBound(oNewAnchors)
        iStop = UBound(oNewAnchors)
        If iStop < iStart then Exit Sub ' empty array, nothing to do
        For i = iStart to iStop
            subAddItemToAnchorList (oAnchors, oNewAnchors(i), sType)
        Next
    End Sub
    
    ' Updates outlineLevels for the current level.
    ' Returns a string like "1.2.3"
    Function fnGetOutlinePrefix(outlineLevel as Integer, outlineLevels() as Integer)
        Dim level as Integer, prefix as String
        outlineLevels(outlineLevel) = outlineLevels(outlineLevel) + 1
        For level = outlineLevel + 1 to 9
            ' Reset all lower levels.
            outlineLevels(level) = 0
        Next
        prefix = ""
        For level = 0 To outlineLevel
            prefix = prefix & outlineLevels(level) & "."
        Next
        fnGetOutlinePrefix = prefix
    End Function
    
    Function fnBuildAnchorList()
        Dim oDoc as Object, oAnchors() as String, anchorName as String
        Dim level as Integer, levelCounter as Integer
        Dim outlineLevels(10) as Integer
        For level = 0 to 9
            outlineLevels(level) = 0
        Next
        oDoc = ThisComponent
    
        ' get the whole document outline
        Dim oParagraphs, thisPara, oTextPortions, thisPortion
        oParagraphs = oDoc.Text.createEnumeration ' all the paragraphs
        Do While oParagraphs.hasMoreElements
            thisPara = oParagraphs.nextElement
            If thisPara.ImplementationName = "SwXParagraph" then ' is a paragraph
                If thisPara.OutlineLevel>0 Then ' is a heading
                    level = thisPara.OutlineLevel - 1
                    anchorName = fnGetOutlinePrefix(level, outlineLevels) & thisPara.String
                    subAddItemToAnchorList (oAnchors, anchorName, "Heading")
                End if
            End if
        Loop
        ' text tables, text frames, images, objects, bookmarks and text sections
        subAddArrayToAnchorList(oAnchors, oDoc.getTextTables().ElementNames, "Table")
        subAddArrayToAnchorList(oAnchors, oDoc.getTextFrames().ElementNames, "Text Frame")
        subAddArrayToAnchorList(oAnchors, oDoc.getGraphicObjects().ElementNames, "Image")
        subAddArrayToAnchorList(oAnchors, oDoc.getEmbeddedObjects().ElementNames, "Object")
        subAddArrayToAnchorList(oAnchors, oDoc.Bookmarks.ElementNames, "Bookmark")
        subAddArrayToAnchorList(oAnchors, oDoc.getTextSections().ElementNames, "Section")
    
        fnBuildAnchorList = oAnchors
    End Function
    
    Function fnIsInArray( theString as String, theArray() as String )
        Dim i as Integer
        For i = LBound(theArray()) To UBound(theArray())
            If theString = theArray(i) Then
                fnIsInArray = True
                Exit function
            End if
        Next
        fnIsInArray = False
    End function
    
    ' Open a _specific_ hyperlink for editing.
    Sub subEditHyperlink(textRange as Object)
        Dim document As Object
        Dim dispatcher As Object
        Dim oVC As Object
    
        oVC = ThisComponent.getCurrentController().getViewCursor()
        oVC.gotoRange(textRange.getStart(), False)
        document = ThisComponent.CurrentController.Frame
        dispatcher = createUnoService("com.sun.star.frame.DispatchHelper")
        dispatcher.executeDispatch(document, ".uno:EditHyperlink", "", 0, Array())
    End Sub
    
    ' auxiliary function to FindBrokenInternalLinks
    ' inspects any links inside the current document fragment
    ' used to have an enumeration inside an enumeration, per OOo examples,
    ' but tables don't have .createEnumeration
    Sub subInspectLinks(oAnchors() as String, oFragment as Object, iFragments as Integer, iLinks as Integer, iBadLinks as Integer)
        Dim sMsg, sImplementation, thisPortion
        sImplementation = oFragment.implementationName
        Select Case sImplementation
    
            Case "SwXParagraph":
                ' paragraphs can be enumerated
                Dim oParaPortions, sLink, notFound
                oParaPortions = oFragment.createEnumeration
                ' go through all the text portions in current paragraph
                While oParaPortions.hasMoreElements
                    thisPortion = oParaPortions.nextElement
                    iFragments = iFragments + 1
                    If Left(thisPortion.HyperLinkURL, 1) = "#" then
                        ' internal link found: get it all except initial # character
                        iLinks = iLinks + 1
                        sLink = right(thisPortion.HyperLinkURL, Len(thisPortion.HyperLinkURL)-1)
                        If Left(sLink,14) = "__RefHeading__" then
                            ' link inside a table of contents, no need to check
                            notFound = False
                        Else
                            notFound = not fnIsInArray(sLink, oAnchors)
                        End if
                        If notFound then
                            ' anchor not found
                            ' *** DEBUG: code below up to MsgBox
                            iBadLinks = iBadLinks + 1
                            sMsg = "Fragment #" & iFragments & ", internal link #" & iLinks & Chr(13) _
                                & "Bad link: [" & thisPortion.String & "] -> [" _
                                & thisPortion.HyperLinkURL & "] " & Chr(13) _
                                & "Paragraph:" & Chr(13) & oFragment.String & Chr(13) _
                                & "Yes to edit link, No to continue, Cancel to stop"
                            Dim iChoice as Integer
                            iChoice = MsgBox (sMsg, MB_YESNOCANCEL + MB_ICONEXCLAMATION, _
                                "Find broken internal link")
                            If iChoice = IDCANCEL Then
                                End
                            ElseIf iChoice = IDYES Then
                                subEditHyperlink(thisPortion)
                            End If
                        End If
                    End if
                Wend
                ' *** END paragraph
    
            Case "SwXTextTable":
                ' text tables have cells
                Dim i, eCells, thisCell, oCellPortions
                eCells = oFragment.getCellNames()
                For i = LBound(eCells) to UBound(eCells)
                    thisCell = oFragment.getCellByName(eCells(i))
                    oCellPortions = thisCell.createEnumeration
                        While oCellPortions.hasMoreElements
                            thisPortion = oCellPortions.nextElement
                            iFragments = iFragments + 1
                            ' a table cell may contain a paragraph or another table,
                            ' so call recursively
                            subInspectLinks (oAnchors, thisPortion, iFragments, iLinks)
                        Wend
    '               xray thisPortion
                    'SwXCell has .String
                Next
                ' *** END text table
    
            Case Else
                sMsg = "Implementation method '" & sImplementation & "' not covered by regular code." _
                    & "OK to continue, Cancel to stop"
                If 2 = MsgBox(sMsg, 48+1) then End
                ' uses xray for element inspection; if not available, comment the two following lines
                BasicLibraries.loadLibrary("XrayTool")
                xray oFragment
                ' *** END unknown case
    
        End Select
    End sub
    
    Sub FindBrokenInternalLinks
        ' Find the next broken internal link
        '
        ' Pseudocode:
        '
        ' * generate link of anchors - *** TO DO: prefix the outline numbering
        ' *  for headings loop, searching for internal links
        '     - is the internal link in the anchor list?
        '         * Yes: continue to next link
        '         * No: (broken link found)
        '             - select that link text - *** TO DO: cannot select it
        '             - open link editor so user can fix this
        '             - stop
        ' * end loop
        ' * display message "No bad internal links found"
    
        Dim oDoc as Object, oFragments as Object, thisFragment as Object
        Dim iFragments as Integer, iLinks as Integer, iBadLinks as Integer, sMsg as String
        Dim oAnchors() as String ' list of all anchors in the document
    
        oDoc = ThisComponent
    
        ' get all document anchors
        oAnchors = fnBuildAnchorList()
    '   subPrintArray("Anchor list", oAnchors) ' *** DEBUG ***
    '   MsgBox( UBound(oAnchors)-LBound(oAnchors)+1 & " anchors found – stand by for checking")
    
        ' find links    
        iFragments = 0 ' fragment counter
        iLinks = 0     ' internal link counter
        iBadLinks = 0
        oFragments = oDoc.Text.createEnumeration ' has all the paragraphs
        While oFragments.hasMoreElements
            thisFragment = oFragments.nextElement
            iFragments = iFragments + 1
            subInspectLinks (oAnchors, thisFragment, iFragments, iLinks, iBadLinks)
        Wend
        If iBadLinks > 0 Then
            sMsg = iBadLinks & " bad link(s), " & iLinks - iBadLinks & " good link(s)"
        ElseIf iLinks Then
            sMsg = iLinks & " internal link(s) found, all good"
        Else
            sMsg = "This document has no internal links"
        End if
        MsgBox (sMsg, 64, "Find broken internal link")
    
    End Sub
    
    ' *** END FindBrokenInternalLinks ***
    

    It now checks for outline numbering. Maybe it's too strict -- perhaps it would be good to have an option to turn off outline number checking.

    As far as issue 3, this code now opens the proper links for editing (as long as "Yes" is clicked in the message box).