I'm trying to retrieve data from the following link but I've noticed that the data is in an interactive Amcharts 1.1. I can't find the best parameters for the GET or POST method. I think the website is using another process to post the data.
Would anyone be able to offer some insights? Your assistance would be greatly appreciated.
library(httr)
library(rvest)
library(XML)
url <- 'https://live.euronext.com/en/product/equities/FR0004040608-XPAR'
page <- read_html(url)
class(page)
# "xml_document" "xml_node"
xml_child(page, 2)
# {html_node}
# <body class="layout-no-sidebars path-product">
# [1] <a href="#main-content" class="visually-hidden focusable skip-link">\n ...
# [2] <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-T ...
# ....
# [20] <script src="/sites/default/files/js/optimized/js_gcfptZrWxdKuaT414lrcjk ...
> page %>% html_nodes(xpath = '//*[@id="price-chart"]')%>% html_text()
# character(0)
page %>% html_nodes("script")%>% html_text()
page %>% html_nodes("noscript")%>% html_text()
# [1] "form.antibot * :not(.antibot-message) { display: none !important; }"
# [2] ""
You have successfully identified the data source, it looks bit cryptic as it actually is encrypted (or "encrypted", at least not just base64 encoded).
Though ajax_secure_dataFilter()
, function that preprocess chart data, is also callable in browser session. So if we choose a lazy route and add chromote
to this mix, we can point it to the site so it would first load all JavaScript requirements for us, we can then create a small js helper function that would fetch json within a Chrome session and calls ajax_secure_dataFilter()
on fetched chart data.
Finally, we can call that js function with any chart data endpoint from R, get decoded JSON as a list of lists and wrangle it into a flat frame.
library(ggplot2)
library(dplyr)
library(chromote)
library(stringr)
url_ <- "https://live.euronext.com/en/product/equities/FR0004040608-XPAR"
chart_max <- str_glue("https://live.euronext.com/en/intraday_chart/getChartData/{basename(url_)}/max")
b <- ChromoteSession$new()
{
b$Page$navigate("https://live.euronext.com/en/product/equities/FR0004040608-XPAR")
b$Page$loadEventFired()
}
#> $timestamp
#> [1] 522961.4
# create js function that fetches chart data and passes it though ajax_secure_dataFilter()
b$Runtime$evaluate("function fetch_chart_data(url) {return fetch(url).then(response => response.json()).then(json => ajax_secure_dataFilter(json, 'json', false))}")
#> $result
#> $result$type
#> [1] "undefined"
# call it with chart data url
chart_data <- b$Runtime$evaluate(str_glue('fetch_chart_data("{chart_max}")'), awaitPromise = TRUE, returnByValue = TRUE)$result$value
head(chart_data, n = 3) |> str()
#> List of 3
#> $ :List of 3
#> ..$ time : chr "1999-09-01 02:00"
#> ..$ price : num 9.7
#> ..$ volume: int 25810
#> $ :List of 3
#> ..$ time : chr "1999-09-02 02:00"
#> ..$ price : num 9.75
#> ..$ volume: int 17430
#> $ :List of 3
#> ..$ time : chr "1999-09-03 02:00"
#> ..$ price : num 9.85
#> ..$ volume: int 28270
# list to frame, parse date/time
chart_data_df <-
chart_data |>
bind_rows(chart_data) |>
mutate(time = lubridate::ymd_hm(time))
Result:
chart_data_df
#> # A tibble: 12,480 × 3
#> time price volume
#> <dttm> <dbl> <dbl>
#> 1 1999-09-01 02:00:00 9.7 25810
#> 2 1999-09-02 02:00:00 9.75 17430
#> 3 1999-09-03 02:00:00 9.85 28270
#> 4 1999-09-06 02:00:00 10.1 57750
#> 5 1999-09-07 02:00:00 10.2 29830
#> 6 1999-09-08 02:00:00 10.0 13700
#> 7 1999-09-09 02:00:00 10.1 17560
#> 8 1999-09-10 02:00:00 10.2 7290
#> 9 1999-09-13 02:00:00 10.2 10040
#> 10 1999-09-14 02:00:00 10.2 12220
#> # ℹ 12,470 more rows
ggplot(chart_data_df, aes(x = time, y = price)) +
geom_area(aes(fill = "price")) +
scale_x_datetime(breaks = "5 years") +
theme_minimal()
Created on 2024-05-28 with reprex v2.1.0