I've created a script that issues a POST HTTP request with the appropriate parameters to fetch the town, continent, country, and inner_link from this webpage. The script can parse 69 containers, but there are 162 items in total. How can I fetch the rest?
import requests
link = 'https://wenomad.so/elasticsearch/search'
inner_link = 'https://wenomad.so/city/{}'
payload = {
"z":"nZ9g0AdFBj7cLRX5v2wSWjjGf2Q5KPpss9DS4wZGh9pvfC4xcJvnTebBg+npAqWaQvdVUFxVD1NZ88siTRUfPo8gB70CGoJG/2MPv9Gu9kC+48KwvV4COpsB3HmER0Mgx0bz2G9pSpw6veTnEUnNR78xonQmhuvL3eztB+ikZaI3OTeuVfRVNetmdX4iDgOkKrM6kLt/2SuRKKwT2aAZHJbdhlTV1I65zj1jD7VBwrm+lJDNh7pZug0/gKCWUDQz4CgmrAdQdnxyJDde2ewzudcsGDimhnWB56bcejoli4LLvevtMB4RUMhmM6FIYn0Tl4sclUD7YLQ8gZQOMmBndDkGctxeq74bpDAwBMOG74qu9gb4WLUFxgB/lWCQ9OnJsfkT0J/kUShhQPoRVr72qUx8f8ldkliIGINoBy9i+lm1RYM3L/NfOJ0kBZ+fbKndVJk2owAZ1kLMupja4iPmpxszQlFGTstpAlF5pTckhL+QYIc6vYbslWqXVs8XrzKs955DHPe1WpWmI714MsJfHhd3XHDsuMy9lfY6mE+cfc0434amFJC5gCgoEhGIQsFQD/kGRaWvqCcMfPYiW/o++nQ017bAKzlg7qb0EfPpy/EMG+u4i7QEU/vvC9mUnVCN0ZzFpxP8HWiTTCF0djuB+UnfUaHKtXciPwwZUTV4o8PtI6v6QdrC4PvtAKSJ9CpIccW+A3SSvOgCgEwOtniCdLxezWaP1Dq3fv9G56HCOvsOGRlQ0RgzNgq/+pCwkvyqFYcs/VtX9NPuaCAAXLi+SFM0xRuI4Sq6nHQr7qs6R2C4gAVHm9bZHfByKZ5x03KJp74IGlGSd1GL9/z9CySVZw==",
"y":"oht3SrBVqLvR2lXJSwtwWw==",
"x":"dmpOxF/FB13c+GGFmDW4Y4SPz6jEItrcjegm/WNbqFk="
}
headers = {
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://wenomad.so',
'referer': 'https://wenomad.so/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'x-requested-with': 'XMLHttpRequest'
}
res = requests.post(link,json=payload,headers=headers)
print(res.status_code)
for item in res.json()['hits']['hits']:
print((
item['_source']['town_text'],
item['_source']['continent__text__text'],
item['_source']['country__text__text'],
inner_link.format(item['_source']['Slug'])
))
You need to replicate the requests to the /elasticsearch/search
endpoint which requires three params x
, y
and z
. These params are generated through a cryptographic encryption in the encode3
function of run.js
First install PyCryptodome:
pip install pycryptodome
Then you can use this script to get all (162) results:
from Crypto.Cipher import AES
from Crypto.Protocol.KDF import PBKDF2
from Crypto.Hash import MD5
from Crypto.Util.Padding import pad
import base64
import json
import random
import time
import requests
def encode(key, iv, text, appname):
derived_key = PBKDF2(key, appname.encode(), dkLen=32, count=7, hmac_hash_module=MD5)
derived_iv = PBKDF2(iv, appname.encode(), dkLen=16, count=7, hmac_hash_module=MD5)
cipher = AES.new(derived_key, AES.MODE_CBC, iv=derived_iv)
text_bytes = pad(text.encode(), AES.block_size)
encrypted_text = cipher.encrypt(text_bytes)
encrypted_base64 = base64.b64encode(encrypted_text).decode()
return encrypted_base64
def generate_payload(data):
v = "1"
appname = 'fie'
cur_timestamp = str(int(time.time() * 1000))
timestamp_version = f'{cur_timestamp}_{v}'
key = appname + cur_timestamp
iv = str(random.random())
text = json.dumps(data, separators=(',', ':'))
encoded = {
'z': encode(key, iv, text, appname),
'y': encode(appname, "po9", timestamp_version, appname),
'x': encode(appname, "fl1", iv, appname)
}
return encoded
def fetch_all_search_results(data):
headers = {
'x-requested-with': 'XMLHttpRequest',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
}
results = []
while True:
payload = generate_payload(data)
response = requests.post('https://wenomad.so/elasticsearch/search', headers=headers, json=payload)
res_json = response.json()
hits = res_json.get('hits', {}).get('hits', [])
results.extend(hits)
data['from'] += len(hits)
if res_json.get('at_end'):
break
return results
data = {
"appname": "fie",
"app_version": "live",
"type": "custom.town",
"constraints": [
{
"key": "active_boolean",
"value": True,
"constraint_type": "equals"
}
],
"sorts_list": [
{
"sort_field": "overall_rating_number",
"descending": True
},
{
"sort_field": "overall_rating_number",
"descending": True
}
],
"from": 0,
"n": 9999,
"search_path": "{\"constructor_name\":\"DataSource\",\"args\":[{\"type\":\"json\",\"value\":\"%p3.AAV.%el.cmQus.%el.cmSJO0.%p.%ds\"},{\"type\":\"node\",\"value\":{\"constructor_name\":\"Element\",\"args\":[{\"type\":\"json\",\"value\":\"%p3.AAV.%el.cmQus.%el.cmSJO0\"}]}},{\"type\":\"raw\",\"value\":\"Search\"}]}",
"situation": "unknown"
}
results = fetch_all_search_results(data)
print(f'{len(results) = }')