groovyxmlslurper

Using xpath on a GPathResult obtained from XmlSlurper.parse()


So, I have this HTML, read programmatically from an email message:

<div style="width: 100% !important; line-height: 1.6em; font-size: 14px; background-color: rgb(246, 246, 246); padding-top: 20px" class="container">
    <table style="background-color: rgb(246, 246, 246); width: 600px; margin: 0 auto !important">
        <tbody>
            <tr>
                <td>
                    <br />
                </td>
                <td class="templateColumns" style="display: block !important; width: 600px !important; margin: 0 auto !important; clear: both !important">
                    <div style="margin: 0 auto; display: block">
                        <table cellspacing="0" cellpadding="0" width="100%" style="background-color: rgb(255, 255, 255)">
                            <tbody>
                                <tr>
                                    <td style="font-size: 16px; font-weight: 500; padding: 20px; line-height: 18px; background-color: rgb(255, 255, 255)">
                                        <img src="cid:zs_branding.jpg" id="ztb-logo-rebrand" style="max-height: 50px" height="50"></img>
                                        <br />
                                    </td>
                                </tr>
                                <tr>
                                    <td>
                                        <table style="background-color: rgb(81, 210, 182)" cellspacing="0" cellpadding="10" align="center" width="100%">
                                            <tbody>
                                                <tr>
                                                    <td class="header-row" style="color: rgb(255, 255, 255); font-size: 16px; font-family: Helvetica, Arial, Sans Serif; border: none; background-color: rgb(81, 210, 182); padding: 20px; height: 28px">
                                                        <div class="sign-mail-header" style="text-align: left; float: left; line-height: normal; padding: 0px 0 0 10px; display: inline-block; font-size: 24px; width: 100%">
                                                            <span class="font" style="font-family: arial, helvetica, sans-serif, sans-serif">
                                                                <b>Digital Signature Request</b>
                                                            </span>
                                                            <br />
                                                        </div>
                                                    </td>
                                                </tr>
                                            </tbody>
                                        </table>
                                    </td>
                                </tr>
                                <tr>
                                    <td style="padding: 25px 40px 0px 40px">
                                        <br />
                                        <table style="padding-bottom: 20px" cellspacing="0" cellpadding="0" width="100%">
                                            <tbody style="font-size: 14px; color: rgb(68, 68, 68); line-height: 20px">
                                                <tr>
                                                    <td class="message-row" style="padding: 0px 0px 20px; font-size: 14px; width: 154px">
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <span>
                                                                <span class="font" style="font-family: arial, helvetica, sans-serif, sans-serif">
                                                                    <span class="size" style="font-size: 16px">SMD has requested you to review and sign the Member agreement.</span>
                                                                </span>
                                                            </span>
                                                            <br />
                                                        </div>
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <br />
                                                        </div>
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <span>
                                                                <span class="font" style="font-family: arial, helvetica, sans-serif, sans-serif">
                                                                    <span class="size" style="font-size: 16px">
                                                                        <b>Organization Name</b>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; SMD
                                                                    </span>
                                                                </span>
                                                            </span>
                                                            <br />
                                                        </div>
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <br />
                                                        </div>
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <span>
                                                                <span class="font" style="font-family: arial, helvetica, sans-serif, sans-serif">
                                                                    <span class="size" style="font-size: 16px">
                                                                        <b>Expires on</b>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Sep 12, 2022
                                                                    </span>
                                                                </span>
                                                            </span>
                                                            <br />
                                                        </div>
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <br />
                                                        </div>
                                                        <div style="word-wrap: break-word; width: 100%; float: left" class="sign-mail-message">
                                                            <span>
                                                                <span class="font" style="font-family: arial, helvetica, sans-serif, sans-serif">
                                                                    <span class="size" style="font-size: 16px">
                                                                        <b>Message to all&nbsp; </b>&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;We have sent you the contract for your review and signature. Please sign the same to proceed further
                                                                        <br />Thank You 
                                                                        <br /> SMD Team.
                                                                    </span>
                                                                </span>
                                                            </span>
                                                            <br />
                                                        </div>
                                                    </td>
                                                </tr>
                                            </tbody>
                                        </table>
                                    </td>
                                </tr>
                                <tr>
                                    <td style="padding: 0 0 20px">
                                        <table width="100%">
                                            <tbody>
                                                <tr>
                                                    <td style="padding-top: 15px" align="center">
                                                        <div>
                                                            <table>
                                                                <tbody>
                                                                    <tr>
                                                                        <td class="button-row" style="font-size: 15px; color: rgb(255, 255, 255); background-color: rgb(232, 78, 88); text-align: center; text-decoration: none; border-radius: 2px; display: inline-block; min-height: 38px" align="center">
                                                                            <a target="_blank" rel="noopener noreferrer" style="font-size: 18px; color: rgb(255, 255, 255); text-align: center; text-decoration: none; border-radius: 3px; display: inline-block; padding: 0px 30px; float: left" href="https://sign-up-link.example.com?id=[blah]" class="sign-mail-btn-link">
                                                                                <div class="sign-mail-btn-text" style="line-height: 38px; font-size: 18px">Start Signing
                                                                                    <br />
                                                                                </div>
                                                                            </a>
                                                                        </td>
                                                                    </tr>
                                                                </tbody>
                                                            </table>
                                                        </div>
                                                    </td>
                                                </tr>
                                            </tbody>
                                        </table>
                                    </td>
                                </tr>
                            </tbody>
                        </table>
                    </div>
                </td>
                <td>
                    <br />
                </td>
            </tr>
        </tbody>
    </table>
    <div class="disclaimer-container" style="background-color: rgb(246, 246, 246); width: 600px; padding: 10px 0px 20px 0px; margin: 0 auto">This is an automated email from Zoho Sign. For any queries regarding this email, please contact the sender helpdesk&#x40;SMD.com directly. If you think this email is inappropriate or spam, you may file a complaint with Zoho Sign 
        <a style="margin: 0;padding: 0;" href="https://www.zoho.com/report-abuse/" target="_blank">here</a>.
    </div>
</div>
<div>
    <br />
</div>

The third-party plugin I was using, would only let me access it as plain string.

So, I have converted it to a GPathResult via new XmlSlurper().parseText(this.GetNewMessage(folderName)) . Now what?

The sign-up link I need, I know I could access it via this xpath selector //a[.//div[@class = 'sign-mail-btn-text']] if this HTML were in my actual browser....

....but how do I go about using that xpath on my GPathResult?


Solution

  • @albciff answered my original question, but I decided to go a different direction. This may interest those of you that want to use full-blown XPath to get your results.

    Also, I realized that I could go one step further with the XPath and get the actual URL. Here's what I came up with:

    import javax.xml.parsers.DocumentBuilderFactory
    import javax.xml.xpath.XPathFactory
    
    import org.w3c.dom.Element
    
    import com.kms.katalon.core.model.FailureHandling
    import com.testwithhari.katalon.plugins.Gmail
    
    
    public final class SMDEmailUtils {
        // ... email util methods and static flags here...
    
        // ... more email util methods here...
    
        /**
         * **NOTE**: forked from https://stackoverflow.com/a/2269464/2027839 , and then refactored
         * 
         * Processes HTML, using XPath
         * 
         * @param html
         * @param xpath
         * @return the result 
         */
        public static String ProcessHTML(String html, String xpath) {
    
            final String properHTML = this.ToProperHTML(html);
    
            final Element document = DocumentBuilderFactory.newInstance()
                    .newDocumentBuilder()
                    .parse(new ByteArrayInputStream( properHTML.bytes ))
                    .documentElement;
            return XPathFactory.newInstance()
                    .newXPath()
                    .evaluate( xpath, document );
        }
    
        private static String ToProperHTML(String html) {
            // SOURCE: https://stackoverflow.com/a/19125599/2027839
            String properHTML = html.replaceAll( "(&(?!amp;))", "&amp;" );
    
            if (properHTML.contains('<!DOCTYPE html'))
                return properHTML;
    
    
            return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
            "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
    <html>
        <head></head>
        <body>
            ${properHTML}
        </body>
    </html>
    """;
        }
    }
    
    

    Then, in my use case, another method on the class, I pass that html in like:

    public static String ExtractSignUpLink() {
            final String folderName = this.GetNewMessageFolderName(30, FailureHandling.STOP_ON_FAILURE);
            return this.ProcessHTML(this.GetNewMessage(folderName), "//a[.//div[@class = 'sign-mail-btn-text']]/@href");
        }
    

    and it works!