rethinkdbreql

How can I perform a join/merge of a nested attribute across tables?


I have a table (db name is libstats, table name is flowcells) full of documents, all of which have a structure like this:

{
        "barcode": "C3W9UACXX",
        "id": "0021732f-2c42-4e9a-90fd-c68bb0d998dc",
        "libraries": [
            {
                "bases": 2431000000,
                "library_id": "SL58263",
                "perc_raw_clusters": 5.5,
                "pf_reads": 24312986,
                "q30": 92.23,
                "qscore": 35.82,
                "reads": 25834646,
                "lane": 1
             },...
         ]
}

The objects in the 'libraries' array will always have the keys shown. I need to add another key library_name.

I have another table libraries in another database libraries which has this library_name information. Here's an example document from that table:

{
   library_id: 'SL123456',
   library_name: 'my_library_name'
}

How can I use ReQL to accomplish this? I've gotten this far:

r.db('libstats').table('flowcells').merge(function(flowcell){
  return {'libraries': flowcell('libraries').map(function(library){
    return library.merge(
       {'library_name': 'foo'}
      )
  })
  }
}).limit(1)

which gives output in exactly the structure I want, but all of my attempts to acquire the library_name attribute using getField, eqJoin(), and merge() have thus far proved fruitless:

{
        "barcode": "C6841ANXX",
        "id": "007cae10-de3c-44df-9aee-1de9c88c1c21",
        "libraries": [
            {
                "bases": 194000000,
                "lane": "1",
                "library_id": "SL91807",
                "library_name": "foo",
                "perc_raw_clusters": 0.9,
                "pf_reads": 1942910,
                "q30": 96.55,
                "qscore": 36.06,
                "reads": 2045599
            },
        ]
    }

Solution

  • Naive Implementation

    You can do the following:

    r.db('libstats').table('flowcells').merge(function(flowcell){
      return {'libraries': flowcell('libraries').map(function(library){
        return library.merge({ 
          // Query the `libraries` table
          'library_name': r.db('libraries').table('libraries')
            // Filter out certain elements
            .filter(function (row) {
              // Return all elements where the `library_id` is equal to 
              // the `library_id` in the `libstats` table
              return row('library_id').eq(library('library_id'))
            })
            // Return the `library_name` for the first element
            (0)('library_name')
        })
      })
      }
    })
    

    Keep in mind that you can also use a secondary index for this and make this a bit simpler and more performant.

    Better Solution

    If you have a lot of documents (10K+), you'll want to create an index on library_id and use the following query:

    r.table('libstats').merge(function(flowcell){
      return {'libraries': flowcell('libraries').map(function(library){
        return library.merge({ 
          // Query the `libraries` table
          'library_name': r.table('libraries')
            // Filter out certain elements
          .getAll(library('library_id'), { index: 'library_id' })(0)('library_name')
        })
      })
      }
    })