pythonpython-3.xweb-scrapingbeautifulsouppython-requests

Unable to produce the result containing the address I wish to obtain


I'm trying to create a script using the requests module and the BeautifulSoup library from this website that will do the following:

Select the Strata plan number button, input 11 in the input box, and then hit the search button. Finally, scrape the address from the result.

After running the script, When I verify the result, I don't see the address within it.

import re
import requests
from bs4 import BeautifulSoup

link = 'https://www.nsw.gov.au/housing-and-construction/strata/strata-search'
url = 'https://www.stratahub.nsw.gov.au/prweb/PRAuth/app/ssr_4380/6nxCgYjOTS_fVOVfeekVPA*/!SchemeSearch?pzTransactionId=cc5ddc1ecec1c095231675db14450f87&pzFromFrame=&pzPrimaryPageName=pyDisplayHarness&AJAXTrackID=22'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    'accept': '*/*',
    'accept-encoding': 'gzip, deflate, br, zstd',
    'accept-language': 'en-US,en;q=0.9',
    'X-Requested-With': 'XMLHttpRequest',
    'origin': 'https://www.stratahub.nsw.gov.au',
}

payload = {
    "$PSchemeSearch$pSearchBy": "Strata Plan Number",
    "$PSchemeSearch$pSchemePlanNumber": 11,
    "pzuiactionzzz": "",
    "PreActivitiesList": "",
    "sectionParam": "",
    "ActivityParams": "=",
    "$ODesktopWrapperInclude": "",
    "$ODeterminePortalTop": "",
    "$ODynamicLayout": "",
    "$ODynamicLayoutCell": "",
    "$OEvalDOMScripts_Include": "",
    "$OForm": "",
    "$OHarness": "",
    "$OHarnessStaticJSEnd": "",
    "$OHarnessStaticJSStart": "",
    "$OHarnessStaticScriptsClientValidation": "",
    "$OPMCHarnessStaticScripts": "",
    "$OSessionUser": "",
    "$OSurveyStaticScripts": "",
    "$OWorkformStyles": "",
    "$OpxAutoComplete": "",
    "$OpxButton": "",
    "$OpxDisplayText": "",
    "$OpxHarnessContent": "",
    "$OpxLayoutContainer": "",
    "$OpxNonTemplate": "",
    "$OpxRadioButtons": "",
    "$OpxSection": "",
    "$OpxVisible": "",
    "$OpxWorkArea": "",
    "$OpxWorkAreaContent": "",
    "$OpyDirtyCheckConfirm": "",
    "$OpyWorkFormStandardEnd": "",
    "$OpyWorkFormStandardStart": "",
    "$OpzAutoCompleteAGIncludes": "",
    "$OpzHarnessInlineScriptsEnd": "",
    "$OpzHarnessInlineScriptsStart": "",
    "$OpzPortalFavIcon": "",
    "$OpzPortalIcon": "",
    "$Opzpega_ui_harnesscontext": "",
    "$Opzpega_web_mashup": "",
    "$OpxTextInput": "",
    "$OpzDecimalInclude": "",
    "pyEncodedParameters": True,
    "pzKeepPageMessages": False,
    "strPHarnessClass": "Data-Portal",
    "strPHarnessPurpose": "SearchStrataScheme",
    "UITemplatingStatus": "Y",
    "StreamName": "SchemeSearch",
    "BaseReference": "SchemeSearch",
    "bClientValidation": True,
    "FormError": "NONE",
    "pyCustomError": "DisplayErrors",
    "UsingPage": True,
    "HeaderButtonSectionName": "-1",
    "PagesToRemove": "",
    "pzHarnessID": "HID387D2E2FCEE4EC200B5BAEA8C6A5D859",
    "inStandardsMode": True
}



with requests.Session() as s:
    s.headers.update(headers)
    res = s.get(link)
    soup = BeautifulSoup(res.text,"lxml")
    code_url = soup.select_one("iframe[title='Strata Search Production']")['data-src']
    s.headers['referer'] = code_url
    payload['pzuiactionzzz'] = code_url.split("?")[-1]
    r = s.post(url,data=payload)
    print(r.status_code)
    print(r.text)

How can I generate the result containing the address I'm after?


Solution

  • I think easier method would be to change strategy how to obtain the data:

    When you see the result and click on "View Map" the page makes request which is lot simpler and address is there.

    E.g.:

    import requests
    
    api_url = (
        "https://portal.spatial.nsw.gov.au/server/rest/services/StrataHub/MapServer/0/query"
    )
    
    plan = "planlabel='SP{}'"
    
    params = {
        "f": "json",
        "where": None,
        "returnGeometry": "true",
        "spatialRel": "esriSpatialRelIntersects",
        "maxAllowableOffset": "0.00001",
        "outFields": "*",
        "outSR": "102100",
    }
    
    params["where"] = plan.format(11)  # <-- change to the number you want
    
    data = requests.get(api_url, params=params).json()
    print(data)
    

    Prints:

    {
        "displayFieldName": "plannumber",
        "fieldAliases": {
            "objectid": "objectid",
            "plannumber": "plannumber",
            "registrationdate": "registrationdate",
            "shape_length": "shape_length",
            "shape_area": "shape_area",
            "address": "address",
            "suburb": "suburb",
            "lga": "lga",
            "lottotal": "lottotal",
            "postcode": "postcode",
            "planlabel": "planlabel",
            "st_area(shape)": "st_area(shape)",
            "st_perimeter(shape)": "st_perimeter(shape)",
        },
        "geometryType": "esriGeometryPolygon",
        "spatialReference": {"wkid": 102100, "latestWkid": 3857},
        "fields": [
            {"name": "objectid", "type": "esriFieldTypeOID", "alias": "objectid"},
            {"name": "plannumber", "type": "esriFieldTypeInteger", "alias": "plannumber"},
            {
                "name": "registrationdate",
                "type": "esriFieldTypeDate",
                "alias": "registrationdate",
                "length": 8,
            },
            {
                "name": "shape_length",
                "type": "esriFieldTypeDouble",
                "alias": "shape_length",
            },
            {"name": "shape_area", "type": "esriFieldTypeDouble", "alias": "shape_area"},
            {
                "name": "address",
                "type": "esriFieldTypeString",
                "alias": "address",
                "length": 255,
            },
            {
                "name": "suburb",
                "type": "esriFieldTypeString",
                "alias": "suburb",
                "length": 255,
            },
            {"name": "lga", "type": "esriFieldTypeString", "alias": "lga", "length": 255},
            {"name": "lottotal", "type": "esriFieldTypeSmallInteger", "alias": "lottotal"},
            {"name": "postcode", "type": "esriFieldTypeInteger", "alias": "postcode"},
            {
                "name": "planlabel",
                "type": "esriFieldTypeString",
                "alias": "planlabel",
                "length": 255,
            },
            {
                "name": "st_area(shape)",
                "type": "esriFieldTypeDouble",
                "alias": "st_area(shape)",
            },
            {
                "name": "st_perimeter(shape)",
                "type": "esriFieldTypeDouble",
                "alias": "st_perimeter(shape)",
            },
        ],
        "features": [
            {
                "attributes": {
                    "objectid": 9,
                    "plannumber": 11,
                    "registrationdate": -259545600000,
                    "shape_length": 0.00128814089728482,
                    "shape_area": 9.29112433528216e-08,
                    "address": "6 BURRANEER BAY ROAD CRONULLA",
                    "suburb": "CRONULLA",
                    "lga": "SUTHERLAND SHIRE",
                    "lottotal": 14,
                    "postcode": 2230,
                    "planlabel": "SP11",
                    "st_area(shape)": 9.291124335282166e-08,
                    "st_perimeter(shape)": 0.001288140897284823,
                },
                "geometry": {
                    "rings": [
                        [
                            [16825771.206571, -4035936.439062],
                            [16825749.918205, -4035952.800724],
                            [16825718.247838, -4035911.771859],
                            [16825739.304771, -4035895.376397],
                            [16825771.206571, -4035936.439062],
                        ]
                    ]
                },
            }
        ],
    }
    

    The address is under "features" key.