const db = await Database.create(":memory:");
await db.run(`INSTALL spatial; LOAD spatial`);
await db.run(`INSTALL json; LOAD json`);
await db.run(`
CREATE TABLE duckdata AS
SELECT * EXCLUDE ${wkbColName}, ST_GeomFromWKB(${wkbColName}) AS geometry
FROM read_parquet('${fileName}/*.parquet', hive_partitioning = true)`
);
const con = await db.connect();
let rows = await con.all(`
COPY (SELECT ST_AsGeoJSON(geometry) AS geometry FROM duckdata LIMIT 10) TO 'my.json' (ARRAY true)`
);
[
{"geometry":
{"type":"Point",
"coordinates":[-73.79132080078125,40.64582824707031]}
},
{"geometry":
{"type":"Point",
"coordinates":[-73.79132080078125,40.64582824707031]}
},
]
[
{"type" : "Feature",
"properties" : {
"capacity" : "10",
"type" : "U-Rack",
"mount" : "Surface"
},
"geometry" : {
"type" : "Point",
"coordinates" : [ -71.073283, 42.417500 ]
}
},
{"type" : "Feature",
"properties" : {
"capacity" : "10",
"type" : "U-Rack",
"mount" : "Surface"
},
"geometry" : {
"type" : "Point",
"coordinates" : [ -71.073283, 42.417500 ]
}
},
]
let rows = await con.all(`
COPY (SELECT * EXCLUDE geometry AS properties, ST_AsGeoJSON(geometry) AS geometry FROM duckdata LIMIT 10) TO 'my.json' (ARRAY true)`
);
Duplicate struct entry name "properties"
. No matter what I change the name of properties
too I still get this errorI've also tried this:
let rows = await con.all(`
COPY (
SELECT
json_object(
'type', 'Feature',
'properties', json_object(
'vendorId', VendorID
),
'geometry', ST_AsGeoJSON(geometry)
)
FROM duckdata
LIMIT 10
) TO 'my.json' (ARRAY true)`
);
with the output of:
[
{"json_object('type', 'Feature', 'properties', json_object('vendorId', VendorID), 'geometry', st_asgeojson(geometry))":{"type":"Feature","properties":{"vendorId":"2"},"geometry":{"type":"Point","coordinates":[-73.79132080078125,40.64582824707031]}}},
{"json_object('type', 'Feature', 'properties', json_object('vendorId', VendorID), 'geometry', st_asgeojson(geometry))":{"type":"Feature","properties":{"vendorId":"1"},"geometry":{"type":"Point","coordinates":[-73.99661254882812,40.766761779785156]}}},
]
If anyone finds this from google I couldn't figure out how to write SQL code to get the full geojson document. What I ended up doing was something like this:
CREATE TABLE mytable AS
SELECT
* EXCLUDE WKBColumn,
ST_GeomFromWKB(WKBColumn) AS geometry
FROM
read_parquet('${sourceConfig.fileName}/*.parquet', hive_partitioning = true);
This reads a parquet from disk and converts the WKB buffer column to a GEOMETRY duckdb type
SELECT
* EXCLUDE geometry,
ST_AsGeoJSON(geometry) AS geometry
FROM
mytable
I select all my columns but exclude the geometry column and then convert it to a geojson object.
Then I wrote some js code to translate the array of dictionaries I get back from the query to a full geojson document. This was actually quite fast and didn't take as long as I thought even on larger queries. You'll notice that there are some hardcoded things that need to be improved but this will get you started in the right direction if you run into this problem that I did
const metadata = config.properties || {};
const columns = Object.keys(data[0]);
return {
type: "FeatureCollection",
features: data.map((row) =>
formatFeature(row, columns, metadata.idField)
),
properties: metadata,
};
function formatFeature(values, columns, idField) {
let feature = {
type: "Feature",
properties: {},
geometry: {
type: "Point",
coordinates: [],
},
};
for (let i = 0; i < columns.length; i++) {
const value = values[columns[i]];
if (columns[i] === "geometry") {
let geom = values[columns[i]]
var geometry = JSON.parse(geom);
feature.geometry = geometry;
} else {
if (columns[i] == idField) {
feature["id"] = value.toString();
}
feature.properties[columns[i]] = value;
}
}
return feature;
}
more info here too https://github.com/duckdb/duckdb_spatial/issues/370