rdecodehttr

Decode output from GET request in R


I'm trying to pull some data using httr::GET but the problem I am facing now is that the response seems to be encoded and I am not sure how I can solve this.

Here is the url of historical data on football for argentina

I had managed to locate the corresponding GET request that pulls the data so I was doing the following:

ul1<- "https://www.oddsportal.com/ajax-sport-country-tournament-archive_/1/vZTffojG/X218480680X24584X0X0X136839680X0X0X0X0X0X0X0X0X134283269X512X1048578X1048576X0X1024X18464X131072X256X0X0X0X0X131072X0X128/0/1/page/1//"

t<-httr::GET(ul1, add_headers('User-Agent' = 'Mozilla/5.0 ', 'sec-ch-ua-platform' = "Windows", 'x-requested-with' = "XMLHttpRequest"), accept_json())

get2json<- content(t, as = "text")

However the response looks like this (sample):

> get2json
"ZDRCVTcxaklMRU5DRHRyeXdPRzdUbnhsWm44MFpZTkVWaWI1am"

From what I've read this might be some base64 encoding so I tried converting it doing this:

rawToChar(base64enc::base64decode(base64url_dec(get2json)))

It seems like it's doing some converting since I now have some / in the output but still it is a bunch of alphanumeric characters.

Any hints appreciated


Solution

  • Using this great answer so CREDIT to @Juiced - the guy is insane for figuring this one out.

    "GuTDQp+LlsfHwf3/a9DJnZn0ba3U+3i7V6PeguPzc72QKl8luFqUyJTx7FTdDCgZIlKDX9Tw9/fjI6tOXi0SB5e4tTR6746yNMfd4FJtZIYxFYg9QLfDD0fRHilIaYxqNh3BG6rRI4crRDZmdZ9rvPjod5X5ZKrBgDjvhH0XZNsbANKx2XpKn3l9KqI97gN4QuSIjxWvb+RL5jIGhYQb5Q==:c0e60d0675dd38a69c8f2e937a6497df"
    

    decodes to

    {'s': 1, 'd': {'nullResultText': 'Unfortunately, no matches can be displayed because there are no odds available from your selected bookmakers.'}, 'refresh': 20}
    

    In Order to decode, you need the salt & passwordobtained from the App.js + an initialization vector IV - IV is given in your decoded text after the collon:. I tried to recreate this in R but could not find any function, that replaces PBKDF2HMAC. So I used the python-obtained aes_key.

    Code

    library(httr)
    library(jsonlite)
    library(base64enc)
    
    ul1 <- "https://www.oddsportal.com/ajax-sport-country-tournament-archive_/1/vZTffojG/X218480680X24584X0X0X136839680X0X0X0X0X0X134283269X512X1048578X1048576X0X1024X18464X131072X256X0X0X0X0X131072X0X128/0/1/page/1//"
    
    t <- httr::GET(ul1, add_headers(
      'User-Agent' = 'Mozilla/5.0 ',
      'sec-ch-ua-platform' = "Windows",
      'x-requested-with' = "XMLHttpRequest"
    ), accept_json())
    
    # Decode base64 content
    decoded_base64 <- base64enc::base64decode(rawToChar(t$content))
    result <- rawToChar(decoded_base64)
    split_data <- unlist(strsplit(result, ":"))
    encrypted <- split_data[1]
    key_hex <- split_data[2]
    
    encrypted_bytes <- base64enc::base64decode(encrypted)
    
    # hex to raw format
    key_bytes <- as.raw(as.numeric(sapply(seq(1, nchar(key_hex), by=2), function(x) strtoi(substr(key_hex, x, x+1), 16L))))
    
    # aes_key obtained from python script
    aes_key <- as.raw(c(0x8f, 0x3b, 0x48, 0x9f, 0x3a, 0x7d, 0xa5, 0xc2, 0x1e, 0x51, 0xcc, 0x15, 0xe5, 0xbe, 0xa6, 0x6f,
                             0x67, 0xe6, 0x05, 0x49, 0x47, 0x37, 0xa7, 0xc2, 0x12, 0xe3, 0xc0, 0xea, 0x31, 0x95, 0xeb, 0xbd))
    
    # Decrypt using AES-CBC
    decrypted_bytes <- openssl::aes_cbc_decrypt(encrypted_bytes, key = aes_key, iv = key_bytes)
    
    decoded <- fromJSON(rawToChar(decrypted_bytes))
    

    giving

    > decoded 
    
    $s
    [1] 1
    
    $d
    $d$nullResultText
    [1] "Unfortunately, no matches can be displayed because there are no odds available from your selected bookmakers."
    
    
    $refresh
    [1] 20
    

    Add

    So I found this implementation of PBKDF2HMAC and translated it to R. Using this I could obtain the aes_key.

    How to obtain Password and Salt

    Using Browser Tools, go to Sources -> www.oddsportal.com/res/public/js/build/app.js?v=250312132723 and search for jt(r.data the left is the Password, the right is the Salt.

    out

    Full Code

    library(httr)
    library(jsonlite)
    library(base64enc)
    
    ul1 <- "https://www.oddsportal.com/ajax-sport-country-tournament-archive_/1/vZTffojG/X218480680X24584X0X0X136839680X0X0X0X0X0X134283269X512X1048578X1048576X0X1024X18464X131072X256X0X0X0X0X131072X0X128/0/1/page/1//"
    
    t <- httr::GET(ul1, add_headers(
      'User-Agent' = 'Mozilla/5.0 ',
      'sec-ch-ua-platform' = "Windows",
      'x-requested-with' = "XMLHttpRequest"
    ), accept_json())
    
    # Decode base64 content
    decoded_base64 <- base64enc::base64decode(rawToChar(t$content))
    result <- rawToChar(decoded_base64)
    split_data <- unlist(strsplit(result, ":"))
    encrypted <- split_data[1]
    key_hex <- split_data[2]
    
    # Decode the encrypted data
    encrypted_bytes <- base64enc::base64decode(encrypted)
    
    # Convert key from hex to raw format
    key_bytes <- as.raw(as.numeric(sapply(seq(1, nchar(key_hex), by=2), function(x) strtoi(substr(key_hex, x, x+1), 16L))))
    
    # Obtaining aes_key 
    
    # Helper function for HMAC
    hmac_sha1 <- function(key, data) {
      require(digest)
      digest::hmac(key, data, algo = "sha256", raw = TRUE)
    }
    
    # Pack integer to big-endian 4-byte representation
    INT <- function(i) {
      stopifnot(i > 0)
      result <- raw(4)
      result[1] <- as.raw((i %/% 16777216) %% 256)
      result[2] <- as.raw((i %/% 65536) %% 256)
      result[3] <- as.raw((i %/% 256) %% 256)
      result[4] <- as.raw(i %% 256)
      return(result)
    }
    
    # XOR two raw vectors - fixed implementation
    xor_raw <- function(A, B) {
      stopifnot(length(A) == length(B))
      result <- raw(length(A))
      for (i in 1:length(A)) {
        # Convert raw to integer, perform XOR, then convert back to raw
        result[i] <- as.raw(bitwXor(as.integer(A[i]), as.integer(B[i])))
      }
      return(result)
    }
    
    # Main PBKDF2 function
    pbkdf2 <- function(P, S, c, dkLen, prf = hmac_sha1) {
      # Get hash length
      test_hash <- prf(P, S)
      hLen <- length(test_hash)
      
      # Validate parameters
      if (dkLen > (2^32 - 1) * hLen) {
        stop("derived key too long")
      }
      
      # Calculate required number of blocks
      l <- ceiling(dkLen / hLen)
      r <- dkLen - (l - 1) * hLen
      
      # F function as per RFC 2898
      F <- function(i) {
        U <- S
        U <- c(U, INT(i))
        result <- NULL
        
        U_prev <- U
        for (j in 1:c) {
          U_prev <- prf(P, U_prev)
          if (is.null(result)) {
            result <- U_prev
          } else {
            result <- xor_raw(result, U_prev)
          }
        }
        
        return(result)
      }
      
      # Calculate all blocks
      T <- vector("list", l)
      for (i in 1:l) {
        T[[i]] <- F(i)
      }
      
      # Concatenate blocks and trim to required length
      DK <- unlist(T)
      if (length(DK) > dkLen) {
        DK <- DK[1:dkLen]
      }
      
      return(DK)
    }
    
    password <- charToRaw("%RtR8AB&nWsh=AQC+v!=pgAe@dSQG3kQ")
    salt <- charToRaw("orieC_jQQWRmhkPvR6u2kzXeTube6aYupiOddsPortal")
    derived_key <- pbkdf2(password, salt, 1000, 32)
    
    # Decrypt using AES-CBC
    decrypted_bytes <- openssl::aes_cbc_decrypt(encrypted_bytes, key = derived_key, iv = key_bytes)
    
    # show
    decoded <- fromJSON(rawToChar(decrypted_bytes))
    decoded