On this public website, depending on the selected federal state, 30 properties are listed on the right-hand side. If this federal state offers more than 30 properties, then another 30 are listed on the next page.
But when I try to parse the links to the properties with this code, I only get the 30 Objects that are listed on page 1, even if the URL is specified as Page=3:
Sub GetDataFromPublicSite()
Dim html As New HTMLDocument
Dim GetHref As String
Dim CountItems As Object
Dim x As Long, y As Long
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", "https://zvg.com/brandenburg?page=3", False
.send
html.body.innerHTML = .responseText
End With
Set CountItems = html.querySelectorAll("a")
For y = 0 To CountItems.length - 1
If InStr(html.querySelectorAll("a").Item(y), "brandenburg") Then
GetHref = html.querySelectorAll("a").Item(y).href
End If
Next
End Sub
Any ideas? I would like to know how I can access the real estate links that are on pages 2,3,4...etc. Thanks!
Power Query is a great tool for scraping any kind of data. This code will get you started.
Sub ZVGQueryNavigate(Url As String)
Const QueryName As String = "ZVGQuery"
Dim Query As WorkbookQuery
' Check if the query exists
On Error Resume Next
Set Query = ThisWorkbook.Queries(QueryName)
On Error GoTo 0
If Query Is Nothing Then
' If query doesn't exist, add it
AddZVGQuery Url
Else
' Update the query formula
Query.Formula = GetFVGQueryFormula(Url)
Query.Refresh
ThisWorkbook.RefreshAll
End If
End Sub
Sub AddZVGQuery(Optional Url As String = "https://zvgscout.com/brandenburg?page=3")
Const QueryName As String = "ZVGQuery"
Const TableName As String = "ZVGTable"
Dim Formula As String
Formula = GetFVGQueryFormula(Url)
Dim Destination As Range
' Ensure the query does not already exist
On Error Resume Next
ThisWorkbook.Queries(QueryName).Delete
On Error GoTo 0
' Set destination to the first cell of the active sheet
Set Destination = ActiveSheet.Range("A1")
' Add the query to the workbook
ActiveWorkbook.Queries.Add Name:=QueryName, Formula:=Formula
' Add a worksheet to display the query output
Dim NewSheet As Worksheet
Set NewSheet = ActiveWorkbook.Worksheets.Add
' Add the query to the table on the new sheet
With NewSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=" & Chr(34) & QueryName & Chr(34) & ";Extended Properties=""""" _
, Destination:=NewSheet.Range("A1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [" & QueryName & "]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = TableName
.Refresh BackgroundQuery:=False
End With
End Sub
Function GetFVGQueryFormula(Url As String) As String
' Define the query formula as lines
Dim Lines(0 To 27) As String
Lines(0) = "let"
Lines(1) = " Source = Web.BrowserContents(" & Chr(34) & Url & Chr(34) & "),"
Lines(2) = " #""Extracted Table From Html"" = Html.Table(Source, "
Lines(3) = " {"
Lines(4) = " {""Case Number"", "".lg\:text-2xl""}, "
Lines(5) = " {""Price"", "".text-blue-600""}, "
Lines(6) = " {""Property Type"", "".leading-6""}, "
Lines(7) = " {""Address"", "".leading-6 + *""}, "
Lines(8) = " {""Date & Time"", "".text-1xl:nth-child(5)""}, "
Lines(9) = " {""Court"", "".text-1xl *""}, "
Lines(10) = " {""Status"", "".top-3""}"
Lines(11) = " }, "
Lines(12) = " [RowSelector="".group""]"
Lines(13) = " ),"
Lines(14) = " #""Changed Type"" = Table.TransformColumnTypes("
Lines(15) = " #""Extracted Table From Html"","
Lines(16) = " {"
Lines(17) = " {""Case Number"", type text}, "
Lines(18) = " {""Price"", type text}, "
Lines(19) = " {""Property Type"", type text}, "
Lines(20) = " {""Address"", type text}, "
Lines(21) = " {""Date & Time"", type text}, "
Lines(22) = " {""Court"", type text}, "
Lines(23) = " {""Status"", type text}"
Lines(24) = " }"
Lines(25) = " )"
Lines(26) = "in"
Lines(27) = " #""Changed Type"""
GetFVGQueryFormula = Join(Lines, vbNewLine)
End Function
Sub Test_GetZVGScoutLinks()
Dim Data As Variant
Data = GetZVGScoutLinks("https://zvgscout.com/brandenburg?page=3")
With Worksheets.Add
.Range("A1").Resize(UBound(Data)).Value = Data
End With
End Sub
Function GetZVGScoutLinks(Optional URL As String = "https://zvgscout.com/brandenburg?page=3", Optional Prefix As String = "https://zvgscout.com/")
Dim Text As String
Dim Matches As Object
Dim RegEx As Object
Dim Match As Object
Dim Href As String
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", URL, False
.send
Text = .responseText
End With
Set RegEx = CreateObject("VBScript.RegExp")
With RegEx
.Pattern = "<a[^>]*\bclass\s*=\s*""[^""]*(group|hover)[^""]*""[^>]*href\s*=\s*""([^""]+)"""
.Global = True
.IgnoreCase = True
End With
Dim Map As New Collection
' Find matches
If RegEx.Test(Text) Then
Set Matches = RegEx.Execute(Text)
For Each Match In Matches
Href = Match.SubMatches(1)
Map.Add Prefix & Href
Next
Else
MsgBox "No matching links found on the page!", vbExclamation
End If
Set RegEx = Nothing
Set Matches = Nothing
Dim Result As Variant
ReDim Result(1 To Map.Count, 1 To 1)
Dim n As Long
For n = 1 To Map.Count
Result(n, 1) = Map(n)
Next
GetZVGScoutLinks = Result
End Function
Sub ZVGQueryNavigate(URL As String)
Const QueryName As String = "ZVGQuery"
Dim Query As WorkbookQuery
' Check if the query exists
On Error Resume Next
Set Query = ThisWorkbook.Queries(QueryName)
On Error GoTo 0
If Query Is Nothing Then
' If query doesn't exist, add it
AddZVGQuery URL
Else
' Update the query formula
Query.Formula = GetFVGQueryFormula(URL)
Query.Refresh
ThisWorkbook.RefreshAll
End If
End Sub
Sub AddZVGQuery(Optional URL As String = "https://zvgscout.com/brandenburg?page=3")
Const QueryName As String = "ZVGQuery"
Const TableName As String = "ZVGTable"
Dim Formula As String
Formula = GetFVGQueryFormula(URL)
Dim Destination As Range
' Ensure the query does not already exist
On Error Resume Next
ThisWorkbook.Queries(QueryName).Delete
On Error GoTo 0
' Set destination to the first cell of the active sheet
Set Destination = ActiveSheet.Range("A1")
' Add the query to the workbook
ActiveWorkbook.Queries.Add Name:=QueryName, Formula:=Formula
' Add a worksheet to display the query output
Dim NewSheet As Worksheet
Set NewSheet = ActiveWorkbook.Worksheets.Add
' Add the query to the table on the new sheet
With NewSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=" & Chr(34) & QueryName & Chr(34) & ";Extended Properties=""""" _
, Destination:=NewSheet.Range("A1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [" & QueryName & "]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = TableName
.Refresh BackgroundQuery:=False
End With
End Sub
Function GetFVGQueryFormula(URL As String) As String
' Define the query formula as lines
Dim Lines(0 To 9) As String
Lines(0) = "let"
Lines(1) = " Source = Web.BrowserContents(" & Chr(34) & URL & Chr(34) & "),"
Lines(2) = " #""Extracted Links From Html"" = Html.Table("
Lines(3) = " Source, "
Lines(4) = " {{""Links"", ""a.group"", each [Attributes][href]}}, "
Lines(5) = " [RowSelector="".group""]"
Lines(6) = " ),"
Lines(7) = " #""Removed Nulls"" = Table.SelectRows(#""Extracted Links From Html"", each [Links] <> null)"
Lines(8) = "in"
Lines(9) = " #""Removed Nulls"""
GetFVGQueryFormula = Join(Lines, vbNewLine)
End Function