I need to read the response from an HTTP server like this:
boost::beast::http::async_read_header
to get headers,To read the body I use boost::beast::http::async_read
. As the response body I need to use boost::beast::http::string_body
. In other words, the response parser should have the following type:
boost::beast::http::response_parser<boost::beast::http::string_body>
I need to access the response body before the full response is received. When the response contains the Transfer-Encoding: chunked
header, we can assign callback functions:
beast::http::response_parser::on_chunk_header
beast::http::response_parser::on_chunk_body
Then these functions will be called as chunks arrive, and we can access the response body data and process it before we get the full response of the entire body.
However, if the response does not contain Transfer-Encoding: chunked
, but contains a Content-Length
that is very long, say 200 megabytes, then we will have to wait a very long time for the entire response. But I need to start processing the data even before I get the full response. And when I get the full response, I need to call the callback function.
How can I solve this problem?
UPDATE
Ideally, I would like to get rid of on_chunk_header
and on_chunk_body
and get raw body chunks in the same way, regardless of whether the response was chunked or not.
Just to outline what I had in mind in my comment:
#include <boost/beast.hpp>
#include <boost/lexical_cast.hpp>
#include <fmt/ranges.h>
#include <iostream>
#include <span>
namespace net = boost::asio;
namespace beast = boost::beast;
namespace http = beast::http;
using boost::system::error_code;
using net::ip::tcp;
tcp::socket send_get() {
net::system_executor ex;
tcp::socket s(ex);
// connect(s, tcp::resolver(ex).resolve("httpbin.org", "http"));
connect(s, tcp::resolver(ex).resolve("44.207.188.95", "80")); // For COLIRU, DNS is not available
http::request<http::empty_body> req{http::verb::get, "/stream-bytes/2000?seed=42", 11};
req.set(http::field::host, "httpbin.org");
write(s, req);
return s;
}
http::response<http::string_body> using_string_body() {
tcp::socket conn = send_get();
http::response<http::string_body> res;
beast::flat_buffer buf;
read(conn, buf, res);
std::cerr << "response: " << res.base() << "\n";
std::span body = res.body();
size_t const n = body.size();
fmt::print("body, {} bytes: {::0x} ... {::0x}\n", n, body.first(10), body.last(10));
auto checksum = reduce(begin(body), end(body), '\0', std::bit_xor<>{});
fmt::print("{} body checksum: {:#0x}\n", __FUNCTION__, checksum);
// return with string_body:
return res;
}
http::response<http::string_body> using_buffer_body() {
tcp::socket conn = send_get();
http::response_parser<http::buffer_body> p;
auto& res = p.get(); // convenience shorthands
auto& body_val = res.body();
beast::flat_buffer buf;
error_code ec;
read_header(conn, buf, p, ec);
//read(conn, buf, p, ec);
if (ec && ec != http::error::need_buffer) // expected
throw boost::system::system_error(ec);
assert(p.is_header_done());
std::cerr << "\n---\nresponse headers: " << res.base() << std::endl;
char unsigned checksum = 0;
size_t n = 0;
std::string full_body;
while (!p.is_done()) {
std::array<char, 512> block;
body_val.data = block.data();
body_val.size = block.size();
read(conn, buf, p, ec);
if (ec && ec != http::error::need_buffer) // expected
throw boost::system::system_error(ec);
size_t curr = block.size() - body_val.size;
n += curr;
std::cerr << "[incrementally parsed " << curr << " body bytes, processing]\n";
full_body.append(block.data(), curr);
for (auto b : std::span(block).first(curr))
checksum ^= b;
}
std::span body = full_body;
fmt::print("body, {} bytes: {::0x} ... {::0x}\n", n, body.first(10), body.last(10));
fmt::print("body, {} bytes streaming decoded, chunked? {}\n", n, p.chunked());
fmt::print("{} body checksum: {:#0x}\n", __FUNCTION__, checksum);
// return with string_body:
return http::response<http::string_body>{std::move(res).base(), std::move(full_body)};
}
int main() {
http::response<http::string_body> //
a = using_string_body(), //
b = using_buffer_body();
fmt::print("a == b: body {} headers {}\n", a.body() == b.body(),
boost::lexical_cast<std::string>(a.base()) == boost::lexical_cast<std::string>(b.base()));
}
Note that sometimes the headers are different because they contain a timestamp. Often, the timestamp are "the same second" though: