httpredpandabenthos

Can't get an url but it works when opened in a browser


I have this benthos pipeline :

input:
  http_server:
    address: ""
    path: /post
    ws_path: /post/ws
    allowed_verbs:
      - POST
    timeout: 120s
    rate_limit: ""

pipeline:
  processors:

    # Parse POST input query to get bounding box from body, ie {"tile": a line from all.json}
    - bloblang: |
        root.bbox.x_min = this.tile.split("&").index(0).split("=").index(1)
        root.bbox.x_max = this.tile.split("&").index(1).split("=").index(1)
        root.bbox.y_min = this.tile.split("&").index(2).split("=").index(1)
        root.bbox.y_max = this.tile.split("&").index(3).split("=").index(1)

    - bloblang: |
        root.url = "https://overpass-api.de/api/interpreter?data=[out:json];(node['amenity'](" + 
          this.bbox.y_min + "," + 
          this.bbox.x_min + "," + 
          this.bbox.y_max + "," + 
          this.bbox.x_max + ");" +
          "way['amenity'](" + 
          this.bbox.y_min + "," + 
          this.bbox.x_min + "," + 
          this.bbox.y_max + "," + 
          this.bbox.x_max + ");" +
          "relation['amenity'](" + 
          this.bbox.y_min + "," + 
          this.bbox.x_min + "," + 
          this.bbox.y_max + "," + 
          this.bbox.x_max + "););out 10 center;"

    # DEBUGGING
    - log: 
        level: info
        message: URL test 
        fields_mapping:
          root.url = this.url

    # Make a request to the Overpass API to fetch data
    - http: 
        url: "${!this.url}"
        verb: GET
        timeout: 60s

    # Parse the JSON response and extract relevant fields (array of elements)
    - bloblang: |
        root.batch = this.response.elements.map_each(e -> if e.tags.amenity != null && e.lat != null && e.lon != null {
          {
            "amenity": e.tags.amenity,
            "lat": e.lat,
            "lon": e.lon
          }
        }).filter(e -> e != null)

output:
  cypher:
    uri: "neo4j://localhost:7687"
    cypher: | 
      UNWIND $batch AS row
      MERGE (p:POI {amenity: row.amenity, lat: row.lat, lon: row.lon})
    # Treat array's rows as batches
    args_mapping: | 
      root.batch = this.batch 
    basic_auth:
      enabled: true
      username: "neo4j"
      password: "neotest"
    batching:
      count: 100
    max_in_flight: 64

The url is formatted correctly (the log output this and it works if you copy paste it into a browser) :

INFO URL test                                      @service=redpanda-connect label="" path=root.pipeline.processors.2 url="https://overpass-api.de/api/interpreter?data=[out:json];(node['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out 10 center;"

But I have this error :

ERRO HTTP request to '${!this.url}' failed: https://overpass-api.de/api/interpreter?data=[out:json];(node['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation['amenity'](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out 10 center;: HTTP request returned unexpected response code (400): 400 Bad Request, Error: <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"><html><head><title>400 Bad Request</title></head><body><h1>Bad Request</h1><p>Your browser sent a request that this server could not understand.<br /></p><hr><address>Apache/2.4.62 (Debian) Server at overpass-api.de Port 443</address></body></html>

Do you have any idea of where it could come from?

I also tried to do a root.url = root.url.escape_url_query() after my root.url = ... but it doesn't work too since my URL looks like this after escaping :

https%3A%2F%2Foverpass-api.de%2Fapi%2Finterpreter%3Fdata%3D%5Bout%3Ajson%5D%3B%28node%5B%27amenity%27%5D%2849.90146355262061%2C5.875795590895844%2C49.905976047309935%2C5.882782232769095%29%3Bway%5B%27amenity%27%5D%2849.90146355262061%2C5.875795590895844%2C49.905976047309935%2C5.882782232769095%29%3Brelation%5B%27amenity%27%5D%2849.90146355262061%2C5.875795590895844%2C49.905976047309935%2C5.882782232769095%29%3B%29%3Bout+10+center%3B

PS : for testing, an input is a request like this :

curl -X POST http://0.0.0.0:4195/post -H "Content-Type: application/json" -d '{"tile": "x_min=5.875795590895844&x_max=5.882782232769095&y_min=49.90146355262061&y_max=49.905976047309935"}'

EDIT :

pipeline:
 processors:

# Parse POST input query to get bounding box from body, ie {"tile": a line from all.json}
- bloblang: |
    root.bbox.x_min = this.tile.split("&").index(0).split("=").index(1)
    root.bbox.x_max = this.tile.split("&").index(1).split("=").index(1)
    root.bbox.y_min = this.tile.split("&").index(2).split("=").index(1)
    root.bbox.y_max = this.tile.split("&").index(3).split("=").index(1)

- bloblang: |
    root.url = "https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;"

# Mapping to clear request body
- mapping: |
    meta = deleted()
    meta url = root.url

# Make a request to the Overpass API to fetch data
- http: 
    url: ${! metadata("url") }
    verb: GET
    timeout: 60s

# Parse the JSON response and extract relevant fields (array of elements)
- bloblang: |
    root.batch = this.response.elements.map_each(e -> if e.tags.amenity != null && e.lat != null && e.lon != null {
      {
        "amenity": e.tags.amenity,
        "lat": e.lat,
        "lon": e.lon
      }
    }).filter(e -> e != null)

Solution

  • The issue is that some characters in the URL query need to be escaped using percent encoding. In the URL from your example, you'll have to escape single quotes and spaces like so:

    https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;
    

    Update: Here is a working config:

    input:
      generate:
        count: 1
        mapping: |
          meta url = "https://overpass-api.de/api/interpreter?data=[out:json];(node[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);way[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095);relation[%27amenity%27](49.90146355262061,5.875795590895844,49.905976047309935,5.882782232769095););out%2010%20center;"
      processors:
        - http:
            url: ${! metadata("url") }
            verb: GET