perlduckdb

DuckDB uploading json files to table with perl


I want to use perl to auomtate uploading multiple json files to duckdb

by 1 run this command

CREATE TABLE mytable AS
    SELECT *
    FROM read_json_auto('input1.json');

then

for (@otherfiles) {

    #run this insert
    
    #INSERT INTO mytable 
    #    SELECT *
    #    FROM read_json_auto('$_');

}

i am not finding any DBD driver for duckdb on CPAN ,any idea to implement this with Perl ?


Solution

  • You can use FFI::Platypus to access the duckdb C client API . There is Alien::DuckDB which helps to get the duckdb binaries installed for use with FFI::Platypus. (Note that Alien::DuckDB requires some undeclared dependencies).

    Here is an example:

    todo1.json:

    [
      {
        "userId": 1,
        "id": 1,
        "title": "delectus aut autem",
        "completed": false
      },
      {
        "userId": 1,
        "id": 2,
        "title": "quis ut nam facilis et officia qui",
        "completed": false
      }
    ]
    

    todo2.json:

    [
      {
        "userId": 1,
        "id": 3,
        "title": "fugiat veniam minus",
        "completed": false
      },
      {
        "userId": 1,
        "id": 4,
        "title": "et porro tempora",
        "completed": true
      }
    ]
    

    test.pl

    use strict;
    use warnings;
    use Alien::DuckDB;
    use FFI::Platypus;
    use FindBin qw($RealBin);
    my $dbfile = "$RealBin/testdatabase";
    my @jsonfiles = map {"$RealBin/$_"} ('todo1.json','todo2.json');
    my $table = 'todo';
    
    my $ffi = FFI::Platypus->new(api => 2 );
    
    my @libs = Alien::DuckDB->dynamic_libs;
    $ffi->lib(@libs);
    
    $ffi->type( 'opaque' => 'duckdb_database' );
    $ffi->type( 'opaque' => 'duckdb_connection' );
    $ffi->type( 'opaque' => 'duckdb_result' );
    
    $ffi->attach('duckdb_open' => ['string','duckdb_database*'] , 'int');
    $ffi->attach('duckdb_close' => ['duckdb_database*'] );
    $ffi->attach('duckdb_connect' => ['duckdb_database','duckdb_connection*'] , 'int');
    $ffi->attach('duckdb_disconnect' => ['duckdb_connection*'] );
    $ffi->attach('duckdb_query' => [ 'duckdb_connection','string','duckdb_result*'],'int');
    my $db;
    my $conn;
    my $err = duckdb_open($dbfile,\$db);
    die "open failed\n" if $err; 
    $err = duckdb_connect($db,\$conn);
    die "connect failed\n" if $err;
    my $result ;
    my $jsonfile = shift @jsonfiles;
    my $q = "CREATE TABLE $table AS
        SELECT *
        FROM read_json_auto('$jsonfile');";
    $err = duckdb_query($conn,$q,$result);
    die "create failed\n" if $err;
    
    for my $f (@jsonfiles){
        $q = "INSERT INTO $table
            SELECT *
            FROM read_json_auto('$f');";
        $err = duckdb_query($conn,$q,$result);
        die "insert failed\n" if $err;
    }
    
    duckdb_disconnect(\$conn);
    duckdb_close(\$db);
    

    in terminal:

    ➜  SO perl test.pl         
    ➜  SO ./duckdb testdatabase
    v1.2.1 8e52ec4395
    Enter ".help" for usage hints.
    D select * from todo;
    ┌────────┬───────┬────────────────────────────────────┬───────────┐
    │ userId │  id   │               title                │ completed │
    │ int64  │ int64 │              varchar               │  boolean  │
    ├────────┼───────┼────────────────────────────────────┼───────────┤
    │      1 │     1 │ delectus aut autem                 │ false     │
    │      1 │     2 │ quis ut nam facilis et officia qui │ false     │
    │      1 │     3 │ fugiat veniam minus                │ false     │
    │      1 │     4 │ et porro tempora                   │ true      │
    └────────┴───────┴────────────────────────────────────┴───────────┘
    D