c++websocketboost-asioboost-beastproxy-server

Debugging a WebSocket Connection Through a Proxy Using Boost Asio and Beast in C++


I have written a script using Boost Asio and Boost Beast to enable WebSocket communication through a proxy server. I managed to develop a working prototype following How to connect with boost::asio to a HTTPS server using a proxy? and some official examples coro-ssl. However, I'm unsure why certain changes in my code made the program successful. Initially, my script failed to perform an SSL handshake after successfully(?) sending an HTTP CONNECT request to the proxy. Altering the way I handled the HTTP CONNECT response seemed to fix the issue. Could the residual data in the buffer from the original HTTP response handling be interfering with the SSL handshake? How does this change impact the overall WebSocket setup?

Here's my original (non-functioning) code:

#include <boost/beast/core.hpp>
#include <boost/beast/ssl.hpp>
#include <boost/beast/websocket.hpp>
#include <boost/beast/websocket/ssl.hpp>
#include <boost/asio/spawn.hpp>
#include <boost/certify/https_verification.hpp>
#include <iostream>
#include <string>
#include <cstdlib> // for std::getenv

namespace beast = boost::beast;
namespace http = beast::http;    
namespace websocket = beast::websocket;
namespace net = boost::asio;
namespace ssl = net::ssl;
using tcp = boost::asio::ip::tcp;

void fail(beast::error_code ec, char const* what)
{
    std::cerr << what << ": " << ec.message() << "\n";
}

void do_session(
    std::string host,
    std::string const& port,
    std::string const& stream_path,
    net::io_context& ioc,
    ssl::context& ctx,
    net::yield_context yield)
{
    beast::error_code ec;
    tcp::resolver resolver(ioc);

    websocket::stream<beast::ssl_stream<beast::tcp_stream>> ws(ioc, ctx);

    std::cout << "Starting session for host: " << host << ", port: " << port << ", stream_path: " << stream_path << std::endl;

    // Get proxy server address
    const char* proxy_env = std::getenv("https_proxy");
    if (!proxy_env) {
        std::cerr << "https_proxy environment variable is not set.\n";
        return;
    }
    std::string proxy_uri = proxy_env;
    std::string proxy_host, proxy_port;
    if (proxy_uri.substr(0, 7) == "http://") {
        proxy_uri = proxy_uri.substr(7);
        std::cout << "Using proxy: " << proxy_uri << std::endl;
    }
    auto pos = proxy_uri.find(':');
    if (pos != std::string::npos) {
        proxy_host = proxy_uri.substr(0, pos);
        proxy_port = proxy_uri.substr(pos + 1);
    } else {
        std::cerr << "Invalid https_proxy format. Expected http://host:port\n";
        return;
    }

    // Resolve the proxy server
    std::cout << "Resolving proxy host..." << std::endl;
    std::cout << "Proxy host: " << proxy_host << ", Proxy port: " << proxy_port << std::endl;
    auto const proxy_results = resolver.async_resolve(proxy_host, proxy_port, yield[ec]);
    if (ec) return fail(ec, "resolve_proxy");
    std::cout << "Proxy host resolved." << std::endl;

    // Connect to the proxy server
    std::cout << "Connecting to proxy endpoint..." << std::endl;
    for (auto const& result : proxy_results) {
        std::cout << "Trying proxy endpoint: " << result.endpoint() << std::endl;
    }
    auto ep = beast::get_lowest_layer(ws).async_connect(proxy_results, yield[ec]);
    if (ec) {
        std::cout << "Failed to connect to proxy endpoint: " << ec.message() << std::endl;
        return fail(ec, "connect_proxy");
    }
    std::cout << "Connected to proxy endpoint." << std::endl;

    // Set tcp::no_delay to reduce latency
    beast::get_lowest_layer(ws).socket().set_option(tcp::no_delay(true));

    // Send HTTP CONNECT request to proxy
    std::cout << "Sending HTTP CONNECT request to proxy..." << std::endl;
    http::request<http::empty_body> req{http::verb::connect, host + ":" + port, 11};
    req.set(http::field::host, host + ":" + port);
    req.set(http::field::user_agent, BOOST_BEAST_VERSION_STRING);
    req.set(http::field::proxy_connection, "keep-alive");
    req.set(http::field::connection, "keep-alive");

    std::cout << "HTTP CONNECT request: " << req << std::endl;
    http::write(beast::get_lowest_layer(ws), req, ec);
    if (ec) return fail(ec, "write_connect");

    // Read HTTP CONNECT response from proxy
    std::cout << "Reading HTTP CONNECT response from proxy..." << std::endl;
    beast::flat_buffer buffer;
    http::response<http::empty_body> res;
    http::read(beast::get_lowest_layer(ws), buffer, res, ec);
    if (ec) return fail(ec, "read_connect");

    if (res.result() != http::status::ok) {
        std::cerr << "Proxy failed to CONNECT: " << res.result_int() << std::endl;
        return;
    }
    std::cout << "HTTP CONNECT response received." << std::endl;
    std::cout << "HTTP CONNECT response: " << res << std::endl;

    // Perform SSL handshake
    std::cout << "Performing SSL handshake..." << std::endl;
    ws.next_layer().async_handshake(ssl::stream_base::client, yield[ec]);
    if (ec) {
        std::cerr << "SSL handshake failed: " << ec.message() << std::endl;
        return fail(ec, "ssl_handshake");
    }
    std::cout << "SSL handshake completed." << std::endl;

    host += ":" + port;

    // Perform WebSocket handshake
    std::cout << "Performing WebSocket handshake..." << std::endl;
    ws.async_handshake(host, stream_path, yield[ec]);
    if (ec) return fail(ec, "handshake");
    std::cout << "WebSocket handshake completed." << std::endl;

    // Set timeout settings for the websocket
    ws.set_option(websocket::stream_base::timeout::suggested(beast::role_type::client));

    std::cout << "Entering read loop..." << std::endl;

    // Loop to read messages
    for (;;)
    {
        std::cout << "Reading message..." << std::endl;
        ws.async_read(buffer, yield[ec]);
        if (ec) return fail(ec, "read");

        std::cout << "Received message: " << beast::make_printable(buffer.data()) << std::endl;

        // Optionally send a pong frame to keep the connection alive
        ws.async_pong({}, yield[ec]);
    }
}

int main(int argc, char** argv)
{
    if (argc != 4)
    {
        std::cerr << "Usage: websocket-client-coro-ssl <host> <port> <stream>\n" <<
                     "Example:\n" <<
                     "    websocket-client-coro-ssl fstream.binance.com 443 /ws/bnbusdt@aggTrade\n" <<
                     "    websocket-client-coro-ssl fstream.binance.com 443 /stream?streams=bnbusdt@aggTrade/btcusdt@markPrice\n";
        return EXIT_FAILURE;
    }
    auto const host = argv[1];
    auto const port = argv[2];
    auto const stream_path = argv[3];

    std::cout << "Starting client for host: " << host << ", port: " << port << ", stream_path: " << stream_path << std::endl;

    net::io_context ioc;
    // boost::asio::ssl::context ctx(boost::asio::ssl::context::sslv23);
    ssl::context ctx{ssl::context::sslv23_client};
    // ctx.load_verify_file("/path/to/cacert.pem");
    ctx.set_verify_mode(boost::asio::ssl::verify_peer);
    ctx.load_verify_file("/usr/lib/ssl/certs/ca-certificates.crt"); // Change to the actual path of your CA cert

    // boost::certify::enable_native_https_server_verification(ctx); // from lib https://github.com/djarek/certify
    ctx.set_options(boost::asio::ssl::context::default_workarounds |
                    boost::asio::ssl::context::no_sslv2 |
                    boost::asio::ssl::context::no_sslv3);

    boost::asio::spawn(ioc, std::bind(
        &do_session,
        std::string(host),
        std::string(port),
        std::string(stream_path),
        std::ref(ioc),
        std::ref(ctx),
        std::placeholders::_1));

    std::cout << "Running IO context..." << std::endl;
    ioc.run();

    std::cout << "Client exited." << std::endl;
    return EXIT_SUCCESS;
}

This is the command line output:

./proxy_client fstream.binance.com 443 /ws/bnbusdt@trade
Starting client for host: fstream.binance.com, port: 443, stream_path: /ws/bnbusdt@trade
Running IO context...
Starting session for host: fstream.binance.com, port: 443, stream_path: /ws/bnbusdt@trade
Using proxy: 127.0.0.1:7890
Resolving proxy host...
Proxy host: 127.0.0.1, Proxy port: 7890
Proxy host resolved.
Connecting to proxy endpoint...
Trying proxy endpoint: 127.0.0.1:7890
Connected to proxy endpoint.
Sending HTTP CONNECT request to proxy...
HTTP CONNECT request: CONNECT fstream.binance.com:443 HTTP/1.1
Host: fstream.binance.com:443
User-Agent: Boost.Beast/300
Proxy-Connection: keep-alive
Connection: keep-alive

Reading HTTP CONNECT response from proxy...
HTTP CONNECT response received.
HTTP CONNECT response: HTTP/1.1 200 Connection established

Performing SSL handshake...
SSL handshake failed: stream truncated
ssl_handshake: stream truncated
Client exited.

When I changed the part of the code that reads the HTTP response to:

    {
        // Read HTTP CONNECT response from proxy
        std::cout << "Reading HTTP CONNECT response from proxy..." << std::endl;
        beast::flat_buffer buffer;
        http::response_parser<http::empty_body> p;
        http::read_header(beast::get_lowest_layer(ws), buffer, p, ec);
        if (ec) return fail(ec, "read_connect");
        http::response<http::empty_body> proxy_response = std::move(p.get());
        
        assert(buffer.size() == 0);
        
        if (proxy_response.result() != http::status::ok) {
            std::cerr << "Proxy failed to CONNECT: " << proxy_response.result_int() << std::endl;
            return;
        }
        std::cout << "HTTP CONNECT response received." << std::endl;
        // output the response and the body
        std::cout << "HTTP CONNECT response: " << proxy_response << std::endl;
    }

the program then ran successfully. I do not understand the reason behind this.


Solution

  • residual data in the buffer from the original HTTP response handling be interfering with the SSL handshake

    Yes. Of course.

    The second version correctly tells the parser not to read a body. (Otherwise the proxy would need to tell you (Does HTTP differentiate between an empty body and no body?), but this is a given for HTTP proxies).

    The second version is also incorrect though, since in both versions buffer may contain any excess data that the server has sent.

    In the first version you almost correctly re-use buffer later on, but the second version seems to discard it because of the extra { } surrounding the buffer scope. Of course, we don't have your actual code, because with those extra braces, the rest of the code cannot compile.

    Fixing The Problem

    You're mixing synchronous and asynchronous operations. That seems unwise. I don't think beast::websocket::stream supports that. So e.g. use

    http::async_write(beast::get_lowest_layer(ws), req, yield[ec]);
    // ...
    http::async_read(beast::get_lowest_layer(ws), buffer, res, yield[ec]);
    

    Next up pass the buffer from the previous read operation into the SSL handshake:

    auto used = ws.next_layer().async_handshake(ssl::stream_base::client, buffer.data(), yield[ec]);
    if (ec) {
        std::cerr << "SSL handshake failed: " << ec.message() << std::endl;
        return fail(ec, "ssl_handshake");
    }
    buffer.consume(used);
    

    At this point you should be safe to assert that the buffer is empty.

    Live Demo

    #include <boost/asio.hpp>
    #include <boost/asio/spawn.hpp>
    #include <boost/beast/core.hpp>
    #include <boost/beast/ssl.hpp>
    #include <boost/beast/websocket.hpp>
    #include <boost/beast/websocket/ssl.hpp>
    //#include <boost/certify/https_verification.hpp>
    #include <cstdlib> // for std::getenv
    #include <iostream>
    #include <string>
    
    namespace beast     = boost::beast;
    namespace http      = beast::http;
    namespace websocket = beast::websocket;
    namespace net       = boost::asio;
    namespace ssl       = net::ssl;
    using tcp           = boost::asio::ip::tcp;
    
    void fail(beast::error_code ec, char const* what) { std::cerr << what << ": " << ec.message() << "\n"; }
    
    void do_session(std::string host, std::string const& port, std::string const& stream_path,
                    net::io_context& ioc, ssl::context& ctx, net::yield_context yield) {
        beast::error_code ec;
        tcp::resolver     resolver(ioc);
    
        websocket::stream<beast::ssl_stream<beast::tcp_stream>> ws(ioc, ctx);
    
        std::cout << "Starting session for host: " << host << ", port: " << port
                  << ", stream_path: " << stream_path << std::endl;
    
        // Get proxy server address
        char const* proxy_env = std::getenv("https_proxy");
        if (!proxy_env) {
            std::cerr << "https_proxy environment variable is not set.\n";
            return;
        }
        std::string proxy_uri = proxy_env;
        std::string proxy_host, proxy_port;
        if (proxy_uri.substr(0, 7) == "http://") {
            proxy_uri = proxy_uri.substr(7);
            std::cout << "Using proxy: " << proxy_uri << std::endl;
        }
        auto pos = proxy_uri.find(':');
        if (pos != std::string::npos) {
            proxy_host = proxy_uri.substr(0, pos);
            proxy_port = proxy_uri.substr(pos + 1);
        } else {
            std::cerr << "Invalid https_proxy format. Expected http://host:port\n";
            return;
        }
    
        // Resolve the proxy server
        std::cout << "Resolving proxy host..." << std::endl;
        std::cout << "Proxy host: " << proxy_host << ", Proxy port: " << proxy_port << std::endl;
        auto proxy_results = resolver.async_resolve(proxy_host, proxy_port, yield[ec]);
        if (ec)
            return fail(ec, "resolve_proxy");
        std::cout << "Proxy host resolved." << std::endl;
    
        // Connect to the proxy server
        std::cout << "Connecting to proxy endpoint..." << std::endl;
        for (auto const& result : proxy_results) {
            std::cout << "Trying proxy endpoint: " << result.endpoint() << std::endl;
        }
        /*auto ep =*/beast::get_lowest_layer(ws).async_connect(proxy_results, yield[ec]);
        if (ec) {
            std::cout << "Failed to connect to proxy endpoint: " << ec.message() << std::endl;
            return fail(ec, "connect_proxy");
        }
        std::cout << "Connected to proxy endpoint." << std::endl;
    
        // Set tcp::no_delay to reduce latency
        beast::get_lowest_layer(ws).socket().set_option(tcp::no_delay(true));
    
        // Send HTTP CONNECT request to proxy
        std::cout << "Sending HTTP CONNECT request to proxy..." << std::endl;
        http::request<http::empty_body> req{http::verb::connect, host + ":" + port, 11};
        req.set(http::field::host, host + ":" + port);
        req.set(http::field::user_agent, BOOST_BEAST_VERSION_STRING);
        req.set(http::field::proxy_connection, "keep-alive");
        req.set(http::field::connection, "keep-alive");
    
        std::cout << "HTTP CONNECT request: " << req << std::endl;
        http::async_write(beast::get_lowest_layer(ws), req, yield[ec]);
        if (ec)
            return fail(ec, "write_connect");
    
        // Read HTTP CONNECT response from proxy
        std::cout << "Reading HTTP CONNECT response from proxy..." << std::endl;
        beast::flat_buffer               buffer;
        http::response<http::empty_body> res;
        {
            http::response_parser<http::empty_body> p;
            http::async_read_header(beast::get_lowest_layer(ws), buffer, p, yield[ec]);
            if (ec)
                return fail(ec, "read_connect");
    
            res = std::move(p.get());
        }
        if (res.result() != http::status::ok) {
            std::cerr << "Proxy failed to CONNECT: " << res.result_int() << std::endl;
            return;
        }
        std::cout << "HTTP CONNECT response received." << std::endl;
        std::cout << "HTTP CONNECT response: " << res << std::endl;
    
        // Perform SSL handshake
        std::cout << "Performing SSL handshake..." << std::endl;
        {
            auto used = ws.next_layer().async_handshake(ssl::stream_base::client, buffer.data(), yield[ec]);
            if (ec) {
                std::cerr << "SSL handshake failed: " << ec.message() << std::endl;
                return fail(ec, "ssl_handshake");
            }
            buffer.consume(used);
        }
        std::cout << "SSL handshake completed." << std::endl;
    
        host += ":" + port;
    
        assert(buffer.size() == 0);
    
        // Perform WebSocket handshake
        std::cout << "Performing WebSocket handshake..." << std::endl;
        ws.async_handshake(host, stream_path, yield[ec]);
        if (ec)
            return fail(ec, "handshake");
        std::cout << "WebSocket handshake completed." << std::endl;
    
        // Set timeout settings for the websocket
        ws.set_option(websocket::stream_base::timeout::suggested(beast::role_type::client));
    
        std::cout << "Entering read loop..." << std::endl;
    
        // Loop to read messages
        for (;;) {
            std::cout << "Reading message..." << std::endl;
            ws.async_read(buffer, yield[ec]);
            if (ec)
                return fail(ec, "read");
    
            std::cout << "Received message: " << beast::make_printable(buffer.data()) << std::endl;
    
            // Optionally send a pong frame to keep the connection alive
            ws.async_pong({}, yield[ec]);
        }
    }
    
    int main(int argc, char** argv) {
        if (argc != 4) {
            std::cerr << "Usage: websocket-client-coro-ssl <host> <port> <stream>\n"
                      << "Example:\n"
                      << "    websocket-client-coro-ssl fstream.binance.com 443 /ws/bnbusdt@aggTrade\n"
                      << "    websocket-client-coro-ssl fstream.binance.com 443 "
                         "/stream?streams=bnbusdt@aggTrade/btcusdt@markPrice\n";
            return EXIT_FAILURE;
        }
        auto const host        = argv[1];
        auto const port        = argv[2];
        auto const stream_path = argv[3];
    
        std::cout << "Starting client for host: " << host << ", port: " << port
                  << ", stream_path: " << stream_path << std::endl;
    
        net::io_context ioc;
        // boost::asio::ssl::context ctx(boost::asio::ssl::context::sslv23);
        ssl::context ctx{ssl::context::sslv23_client};
        // ctx.load_verify_file("/path/to/cacert.pem");
        ctx.set_verify_mode(boost::asio::ssl::verify_peer);
        ctx.load_verify_file(
            "/usr/lib/ssl/certs/ca-certificates.crt"); // Change to the actual path of your CA cert
    
        // boost::certify::enable_native_https_server_verification(ctx); // from lib
        // https://github.com/djarek/certify
        ctx.set_options(boost::asio::ssl::context::default_workarounds | boost::asio::ssl::context::no_sslv2 |
                        boost::asio::ssl::context::no_sslv3);
    
        spawn(ioc,
              std::bind(&do_session, std::string(host), std::string(port), std::string(stream_path),
                        std::ref(ioc), std::ref(ctx), std::placeholders::_1),
              boost::asio::detached);
    
        std::cout << "Running IO context..." << std::endl;
        ioc.run();
    
        std::cout << "Client exited." << std::endl;
        return EXIT_SUCCESS;
    }
    

    Using tinyproxy on port 7777 locally: