dockernginxreverse-proxyopenresty

Why is OpenResty Nginx converting my upstream's xml response to JSON?


Unlike most of the other questions I have found here on this topic, I don't want Nginx to convert my upstream's response to JSON but it seems to be doing it anyway and I can't figure out why.

I am using the ubuntu:jammy version docker-openresty as my base install with some small modifications. I have separated out the massive single RUN block into a few smaller logically-separated RUNs to try and mitigate the time loss of build failures during development, I have removed luarocks from the installation, I have added direct package installs of lua-resty-http, lua-resty-openssl, and nginx-lua-prometheus, and obviously I have included my nginx configurations at the bottom of the file. I currently have the location hard-coded to send to the api1 upstream for debugging purposes just to remove uncertainty but the issue remains. Running the exact same command against the upstream server directly returns the results I am expecting (XML response body) but against the nginx proxy I get JSON. I'm really at a loss here and would appreciate any input!

curl against upstream

curl -u parse_only_user:ABCD1234$ -d '{"txn_id": 100001}' -H 'Content-Type: application/json' testserver2:8081/api/39e6f3b0-aa69-4b39-9037-0cb08dcf1705 -v
*   Trying 10.244.31.96...
* TCP_NODELAY set
* Connected to testserver2 (10.244.31.96) port 8081 (#0)
* Server auth using Basic with user 'parse_only_user'
> POST /api/39e6f3b0-aa69-4b39-9037-0cb08dcf1705 HTTP/1.1
> Host: testserver2:8081
> Authorization: Basic cGFyc2Vfb25seV91c2VyOkFCQ0QxMjM0JA==
> User-Agent: curl/7.61.1
> Accept: */*
> Content-Type: application/json
> Content-Length: 18
>
* upload completely sent off: 18 out of 18 bytes
< HTTP/1.1 200
< Cache-Control: no-store, no-cache, must-revalidate
< Pragma: no-cache
< vary: accept-encoding
< Content-Type: text/html;charset=UTF-8
< Content-Length: 319
< Date: Thu, 28 Nov 2024 14:54:32 GMT
<
<?xml version="1.0" encoding="UTF-8"?><transaction-results><transaction-id>100001</transaction-id><cross-reference>49c721a3-ac1c-4069-8af9-04a4a3a24e07</cross-reference><rules-tripped></rules-tripped><total-score>0</total-score><recommendation-code>ACCEPT</recommendation-code><remarks></remarks></transaction-results>
* Connection #0 to host testserver2 left intact

curl against nginx

curl -u parse_only_user:ABCD1234$ -d '{"txn_id": 100001}' -H 'Content-Type: application/json' reverse-proxy.test.com:5009/api/7ea50807-0d92-4f1f-8f35-6f04b4b5955b -v
*   Trying 127.0.0.1...
* TCP_NODELAY set
* Connected to reverse-proxy.test.com (127.0.0.1) port 5009 (#0)
* Server auth using Basic with user 'parse_only_user'
> POST /api/7ea50807-0d92-4f1f-8f35-6f04b4b5955b HTTP/1.1
> Host: reverse-proxy.test.com:5009
> Authorization: Basic cGFyc2Vfb25seV91c2VyOkFCQ0QxMjM0JA==
> User-Agent: curl/7.61.1
> Accept: */*
> Content-Type: application/json
> Content-Length: 18
>
* upload completely sent off: 18 out of 18 bytes
< HTTP/1.1 200
< Date: Thu, 28 Nov 2024 14:49:58 GMT
< Content-Type: text/html;charset=UTF-8
< Content-Length: 196
< Connection: keep-alive
< Cache-Control: no-store, no-cache, must-revalidate
< Pragma: no-cache
< vary: accept-encoding
<
{"transaction-results": { "transaction-id": "100001","cross-reference": "161d9f1a-e228-4eb3-b6a6-37043f0a3a9e","rules-tripped": "","total-score": 0,"recommendation-code": "ACCEPT","remarks": ""}}
* Connection #0 to host reverse-proxy.test.com left intact

/usr/local/openresty/nginx/conf/nginx.conf

# Defines the number of worker processes. Generally, it should match the number of CPU cores.
worker_processes 4;

# allow engine to decide which cores to bind worker processes to
worker_cpu_affinity auto;

# create a "default" thradpool with 4 threads which will queue up to 50,000 requests before dropping
thread_pool default threads=4 max_queue=50000;

# Limit on the maximum number of open files (RLIMIT_NOFILE) for worker processes.
worker_rlimit_nofile 64000;

# set error logging to notice as flushing errors is computationally expensive
error_log logs/error.log notice;

# Defines a file that will store the process ID of the main process.
#pid /var/run/nginx.pid;
pid ./nginx.pid;

# Enables the use of JIT for regular expressions to speed-up their processing.
pcre_jit on;

events {
    # Maximum number of simultaneous connections that can be opened by a worker process. This includes all
    # connections, including incoming connections from clients, connections to upstreams, and any others.
    worker_connections 50000;

    # Serve many clients each thread (Linux only)
    use epoll;

    # Accept as many connections as possible. If it is disabled, a worker process will accept one new connection at a time.
    multi_accept on;
}

http {
    # disable responding with nginx/OS version information
    server_tokens off;

    # remove the server header for all response types
    more_clear_headers "Server"

    # set paths for temp files
    client_body_temp_path /var/run/openresty/nginx-client-body;
    proxy_temp_path       /var/run/openresty/nginx-proxy;
    fastcgi_temp_path     /var/run/openresty/nginx-fastcgi;
    uwsgi_temp_path       /var/run/openresty/nginx-uwsgi;
    scgi_temp_path        /var/run/openresty/nginx-scgi;

    # Copies data directly between file descriptors at the kernel level, which is much faster
    # than read() + write(). Also limit this copy to 1 MB chunks to avoid connection hangs
    sendfile on;
    sendfile_max_chunk 1m;

    # Use the default thread pool for asynchronous file I/O
    aio threads;

    # use O_DIRECT instead of sendfile/aio for files larger than or equal to 6 MB
    directio 6m;

    # Send headers in one piece, it is better than sending them one by one
    tcp_nopush on;

    # Don"t buffer data sent, good for small data bursts in real time
    tcp_nodelay on;

    # Disable logging if a file can"t be found
    log_not_found off;

    # Server will close connection after this time
    keepalive_timeout 60;

    include       mime.types;

    # Max allowed size of the client request body
    client_max_body_size 250M;

    # If the request body size is more than the buffer size, then the entire (or partial)
    # request body is written into a temporary file
    client_body_buffer_size 512k;

    # This timeout is set only for a period between two successive read operations, not for the transmission of the whole request body.
    # The default is 60s and I can"t find any real information on why it is so high, so I am reducing it here and we will see what happens.
    client_body_timeout 10s;

    # Allow the server to close connection on non responding client, this will free up memory
    reset_timedout_connection on;

    # Configures logging.
    log_format main 'ACCESS LOG: remote_addr: "$remote_addr", remote_user: "$remote_user", time_local: "$time_local", request: "$request", '
    'status: "$status", body_bytes_sent: "$body_bytes_sent", http_referer: "\$http_referer", '
    'http_user_agent: "$http_user_agent", upstream_addr: "$upstream_addr",'
    'http_content_type: "$http_content_type", resp_content_type: "$upstream_http_content_type"';

    # Sets the path, format, and configuration for a buffered log write.
    access_log logs/access.log main;


    # disable compression as this will probably fight with existing RTD cmpression
    gzip off;

    resolver 10.244.4.79 10.244.4.74 10.244.54.79 valid=10s ipv6=off;
    include /etc/nginx/conf.d/*.conf;
}

/etc/nginx/conf.d/1_upstreams.conf

#upstream <farm name> {
    # server <hostname>:<port>;
    # server <hostname>:<port>;
    # ...

    # # enables connection pooling/caching and sets the max number of IDLE connections to 16.
    # # note this DOES NOT set the max number of total connections, just idle ones.
    # keepalive 16;

    # # the maximum lifetime of a connection. If a connection is in use at the TTL the final request
    # # will be completed and then the connection closed.
    # keepalive_time 20m;

    # # the idle timeout of a connection.
    # keepalive_timeout 60s;
#}

upstream api1 {
    server testserver1:8081;

    # enables connection pooling/caching and sets the max number of IDLE connections to 16.
    # note this DOES NOT set the max number of total connections, just idle ones.
    keepalive 16;

    # the maximum lifetime of a connection. If a connection is in use at the TTL the final request
    # will be completed and then the connection closed.
    keepalive_time 20m;

    # the idle timeout of a connection.
    keepalive_timeout 60s;
}

upstream api2 {
    server testserver2:8081;

    keepalive 16;
    keepalive_time 20m;
    keepalive_timeout 60s;
}

/etc/nginx/conf.d/router.conf

server {
    # listen on port 80. This is INTERNAL to the container, so the port which should be forwarded
    # to by the network load balancer(s) will be whatever port is mapped externally.
    listen 80;
    server_name reverse-proxy.test.com

    # set error responses to an empty html file
    error_page 400 401 403 404 500 502 503 504 /empty_error_page.html;

    location ~ ^\/api\/([a-f0-9]+-[a-f0-9]+-[a-f0-9]+-[a-ff0-9]+-[a-f0-9]+)$ {
        proxy_http_version 1.1;
        proxy_set_header Accept $http_accept;
        proxy_set_header Connection "";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header Accept application/xml;
        proxy_pass_header Content-Type;
        proxy_pass http://api1;
    }

    location = empty_404.html {
        root /usr/local/openresty/nginx/html;

        # Prevent direct access to this page
        internal;
    }
}

Solution

  • I am closing this out as it was an issue with the keyboard-chair interface. The curl commands were hitting different paths on the upstream which were returning different response types. Nginx is doing nothing wrong.