I'm trying to import the information in JSON format from the following Url by WinHttpRequest: https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default
Sub test()
Dim xmlhttp As Object
Dim strUrl As String: strUrl = "https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default"
Dim objRequest As Object
Set objRequest = CreateObject("WinHttp.WinHttpRequest.5.1")
With objRequest
.Open "GET", strUrl, False
.send
End With
Debug.Print objRequest.responseText
End Sub
However, it just shows nothing similar to the Url but a lot of garbled messages.
I would like to know how to address this problem. The code works fine if I use other Url.
I believe the page has bot prevention measures in place whereby, if it suspects you are a bot a challenge is raised which requires javascript to run. If that runs successfully an XHR request is issued with info from the challenge in the headers and that, were you to be using a browser, would lead to your content being correctly updated to show expected values.
The first time I ran GET request I got the expected json response and after that I got the following:
<HTML>
<head>
<script>
Challenge=649275;
ChallengeId=473313563;
GenericErrorMessageCookies="Cookies must be enabled in order to view this page.";
</script>
<script>
function test(var1)
{
var var_str=""+Challenge;
var var_arr=var_str.split("");
var LastDig=var_arr.reverse()[0];
var minDig=var_arr.sort()[0];
var subvar1 = (2 * (var_arr[2]))+(var_arr[1]*1);
var subvar2 = (2 * var_arr[2])+var_arr[1];
var my_pow=Math.pow(((var_arr[0]*1)+2),var_arr[1]);
var x=(var1*3+subvar1)*1;
var y=Math.cos(Math.PI*subvar2);
var answer=x*y;
answer-=my_pow*1;
answer+=(minDig*1)-(LastDig*1);
answer=answer+subvar2;
return answer;
}
</script>
<script>
client = null;
if (window.XMLHttpRequest)
{
var client=new XMLHttpRequest();
}
else
{
if (window.ActiveXObject)
{
client = new ActiveXObject('MSXML2.XMLHTTP.3.0');
};
}
if (!((!!client)&&(!!Math.pow)&&(!!Math.cos)&&(!![].sort)&&(!![].reverse)))
{
document.write("Not all needed JavaScript methods are supported.<BR>");
}
else
{
client.onreadystatechange = function()
{
if(client.readyState == 4)
{
var MyCookie=client.getResponseHeader("X-AA-Cookie-Value");
if ((MyCookie == null) || (MyCookie==""))
{
document.write(client.responseText);
return;
}
var cookieName = MyCookie.split('=')[0];
if (document.cookie.indexOf(cookieName)==-1)
{
document.write(GenericErrorMessageCookies);
return;
}
window.location.reload(true);
}
};
y=test(Challenge);
client.open("POST",window.location,true);
client.setRequestHeader('X-AA-Challenge-ID', ChallengeId);
client.setRequestHeader('X-AA-Challenge-Result',y);
client.setRequestHeader('X-AA-Challenge',Challenge);
client.setRequestHeader('Content-Type' , 'text/plain');
client.send();
}
</script>
</head>
<body>
Whether you mimic what the javascript is doing and pass as a new XHR I am unsure (haven' looked closely).
You could also try browser automation e.g. IE via Microsoft Internet Controls or Chrome/FF etc via Selenium Basic, to see if letting javascript run on the page gets around this problem.
I started looking at an attempt to handle this. Currently, I keep getting the json response so haven't fully tested the bottom part. I would expect some minute *do we care? margin for error if only because Math.PI
gives 3.141592653589793
, whereas Application.PI
gives 3.14159265358979
Option Explicit
Public Sub GetInfo()
Dim json As Object, s As String, re As Object, ws As Worksheet
Dim pattern1 As String, pattern2 As String, challenge As Long, challengeId As Long
Const URL As String = "https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default"
pattern1 = "Challenge=(\d+);"
pattern2 = "ChallengeId=(\d+);"
Set re = CreateObject("vbscript.regexp")
Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
On Error Resume Next
Set json = JsonConverter.ParseJson(s)
On Error GoTo 0
If Not json Is Nothing Then
Debug.Print "No challenge issued"
Debug.Print .responseText
Else
On Error GoTo errhand
challenge = GetId(re, s, pattern1)
If challenge = 999 Then Exit Sub 'should really use more unlikely value.
challengeId = GetId(re, s, pattern2)
.Open "POST", URL, False
.setRequestHeader "X-AA-Challenge-ID", challengeId
.setRequestHeader "X-AA-Challenge-Result", CLng(GetAnswer(challenge))
.setRequestHeader "X-AA-Challenge", challenge
.setRequestHeader "Content-Type", "text/plain"
.send ""
Debug.Print .Status, .responseText
If .Status = 200 Then
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
Debug.Print s
End If
End If
End With
Exit Sub
errhand:
Debug.Print Err.Number, Err.Description
End Sub
Public Function GetId(ByVal re As Object, ByVal s As String, ByVal pattern As String) As Long
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.pattern = pattern
If .TEST(s) Then
GetId = .Execute(s)(0).SubMatches(0)
Else
GetId = 999 '<probably should use a more unlikely number here!
End If
End With
End Function
Public Function GetAnswer(ByVal challenge As Long) As String 'var1 'challenge
Dim var_str As String, var_arr() As Long, LastDig As Long, minDig As Long
Dim i As Long
var_str = Chr$(34) & challenge & Chr$(34)
ReDim var_arr(0 To Len(var_str) - 3)
For i = 2 To Len(var_str) - 1
var_arr(i - 2) = CLng(Mid$(var_str, i, 1))
Next i
LastDig = var_arr(UBound(var_arr))
minDig = Application.Min(var_arr)
Dim my_pow As Long, x As Long, y As Long, answer As Variant
Dim subvar1 As Long, subvar2 As String
subvar1 = 2 * Application.Small(var_arr, 3) + Application.Small(var_arr, 2)
subvar2 = CStr(2 * Application.Small(var_arr, 3)) & CStr(Application.Small(var_arr, 2))
my_pow = (minDig + 2) ^ Application.Small(var_arr, 2)
x = challenge * 3 + (subvar1 * 1)
y = Evaluate("=COS(PI()* " & CLng(subvar2) & ")")
answer = x * y
answer = answer - my_pow
answer = answer + minDig - LastDig
answer = CStr(answer) & subvar2
GetAnswer = answer
End Function
Standard IE automation with Microsoft Internet Controls lead to SaveAs/Open Dialog prompt.
Using selenium you can avoid this prompt and grab the data from the pre element. Using selenium allows you to benefit from an implicit wait which allows the page to complete any challenge issued. You can increase the wait using explicit wait conditions.
Option Explicit
'download selenium https://github.com/florentbr/SeleniumBasic/releases/tag/v2.0.9.0
'Ensure latest applicable driver e.g. ChromeDriver.exe in Selenium folder
'VBE > Tools > References > Add reference to selenium type library
Public Sub DownloadFile()
Dim d As WebDriver, jsonText As String
Set d = New ChromeDriver
Const URL = "https://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_allodds.aspx&matchid=default"
With d
.Start "Chrome"
.get URL
jsonText = .FindElementByCss("pre").Text
Debug.Print jsonText
Stop
.Quit
End With
End Sub
Note I am using a json parser. After adding the .bas from that link you need to go VBE > Tools > References > Add a reference to Microsoft Scripting Runtime.