sqlnode.jsgeojsonduckdb

DuckDB constructing a full GeoJSON feature collection


What I've done:

Code:

const db = await Database.create(":memory:");
await db.run(`INSTALL spatial; LOAD spatial`);
await db.run(`INSTALL json; LOAD json`);
await db.run(`
        CREATE TABLE duckdata AS 
        SELECT * EXCLUDE ${wkbColName}, ST_GeomFromWKB(${wkbColName}) AS geometry
        FROM read_parquet('${fileName}/*.parquet', hive_partitioning = true)`
      );

 const con = await db.connect();
 let rows = await con.all(`
      COPY (SELECT ST_AsGeoJSON(geometry) AS geometry FROM duckdata LIMIT 10) TO 'my.json' (ARRAY true)`
 );

What I'm currently getting back as output (only showing two rows for simplicity sake):

[
    {"geometry":
        {"type":"Point",
         "coordinates":[-73.79132080078125,40.64582824707031]}
    },
    {"geometry":
        {"type":"Point",
         "coordinates":[-73.79132080078125,40.64582824707031]}
    },
]

What I'd like to get back as output:

[
       {"type" : "Feature", 
        "properties" : {  
            "capacity" : "10", 
            "type" : "U-Rack",
            "mount" : "Surface"
        }, 
        "geometry" : { 
            "type" : "Point", 
            "coordinates" : [ -71.073283, 42.417500 ] 
        }
},
       {"type" : "Feature", 
        "properties" : {  
            "capacity" : "10", 
            "type" : "U-Rack",
            "mount" : "Surface"
        }, 
        "geometry" : { 
            "type" : "Point", 
            "coordinates" : [ -71.073283, 42.417500 ] 
        }
},
]

What I've tried:

 let rows = await con.all(`
      COPY (SELECT * EXCLUDE geometry AS properties, ST_AsGeoJSON(geometry) AS geometry FROM duckdata LIMIT 10) TO 'my.json' (ARRAY true)`
    );

I've also tried this:

 let rows = await con.all(`
      COPY (
        SELECT 
          json_object(
            'type', 'Feature', 
            'properties', json_object(
               'vendorId', VendorID
            ),
            'geometry', ST_AsGeoJSON(geometry)
          ) 
          FROM duckdata 
          LIMIT 10 
      ) TO 'my.json' (ARRAY true)`
    );

with the output of:

[
    {"json_object('type', 'Feature', 'properties', json_object('vendorId', VendorID), 'geometry', st_asgeojson(geometry))":{"type":"Feature","properties":{"vendorId":"2"},"geometry":{"type":"Point","coordinates":[-73.79132080078125,40.64582824707031]}}},
    {"json_object('type', 'Feature', 'properties', json_object('vendorId', VendorID), 'geometry', st_asgeojson(geometry))":{"type":"Feature","properties":{"vendorId":"1"},"geometry":{"type":"Point","coordinates":[-73.99661254882812,40.766761779785156]}}},
]

Solution

  • If anyone finds this from google I couldn't figure out how to write SQL code to get the full geojson document. What I ended up doing was something like this:

    CREATE TABLE mytable AS 
    SELECT 
        * EXCLUDE WKBColumn, 
        ST_GeomFromWKB(WKBColumn) AS geometry
    FROM 
        read_parquet('${sourceConfig.fileName}/*.parquet', hive_partitioning = true);
    

    This reads a parquet from disk and converts the WKB buffer column to a GEOMETRY duckdb type

    SELECT 
        * EXCLUDE geometry, 
        ST_AsGeoJSON(geometry) AS geometry 
    FROM 
        mytable 
    

    I select all my columns but exclude the geometry column and then convert it to a geojson object.

    Then I wrote some js code to translate the array of dictionaries I get back from the query to a full geojson document. This was actually quite fast and didn't take as long as I thought even on larger queries. You'll notice that there are some hardcoded things that need to be improved but this will get you started in the right direction if you run into this problem that I did

    const metadata = config.properties || {};
    const columns = Object.keys(data[0]);
    
    return {
      type: "FeatureCollection",
      features: data.map((row) =>
        formatFeature(row, columns, metadata.idField)
      ),
      properties: metadata,
    };
      
    
    function formatFeature(values, columns, idField) {
      let feature = {
        type: "Feature",
        properties: {},
        geometry: {
          type: "Point",
          coordinates: [],
        },
      };
    
      for (let i = 0; i < columns.length; i++) {
        const value = values[columns[i]];
    
        if (columns[i] === "geometry") {
          let geom = values[columns[i]]
          var geometry = JSON.parse(geom);
          feature.geometry = geometry;
        } else {
          if (columns[i] == idField) {
            feature["id"] = value.toString();
          }
          feature.properties[columns[i]] = value;
        }
      }
    
      return feature;
    }
    

    more info here too https://github.com/duckdb/duckdb_spatial/issues/370