algorithmschemesparqlkey-value-storetriplestore

What is a smallest set of indices that allows to fully bind any pattern of 6-tuple in one hop?


I am trying to build a 6-tuple store on top of wiredtiger. The tuples can be described as follow:

(graph, subject, predicate, object, alive, transaction)

Every tuple stored in the database is unique.

Queries are like regular SPARQL queries except that the database store 6 tuples. Zero of more elements of a tuple can be variable. Here is an example query that allows to retrieve all changes introduces by a particular transaction P4X432:

SELECT ?graph ?subject ?predicate ?object ?alive
WHERE 
{
  ?graph ?subject ?predicate ?object ?alive "P4X432"
}

Considering all possible patterns ends up with considering all combinations of:

(graph, subject, predicate, object, alive, transaction)

That is given by the following function:

def combinations(tab):
    out = []
    for i in range(1, len(tab) + 1):
        out.extend(x for x in itertools.combinations(tab, i))

    assert len(out) == 2**len(tab) - 1
    return out

Where:

print(len(combinations(('graph', 'subject', 'predicate', 'object', 'alive', 'transaction'))))

Display:

63

That is there 63 combinations of the 6-tuples. I can complete each indices with the missing tuple item, e.g. the following combination:

('graph', 'predicate', 'transaction')

Will be associated with the following index:

('graph', 'predicate', 'transaction', 'subject', 'alive', 'object')

But I know there is a smaller subset of all permutations of the 6-tuple that has the following property:

A set of n-permutations of {1, 2, ..., n} where all combinations of {1, 2, ..., n} are prefix-permutation of at least one element of the set.

Otherwise said, all combinations have a permutation that is prefix of one element of the set.

I found using a brute force algorithm a set of size 25 (inferior to 63) that has that property:

((5 0 1 2 3 4) (4 5 0 1 2 3) (3 4 5 0 1 2) (2 3 4 5 0 1) (1 2 3 4 5 0) (0 1 2 3 4 5) (0 2 1 3 4 5) (0 3 2 1 5 4) (0 4 3 1 5 2) (0 4 2 3 1 5) (2 1 5 3 0 4) (3 2 1 5 0 4) (3 1 4 5 0 2) (3 1 5 4 2 0) (3 0 1 4 2 5) (3 5 2 0 1 4) (4 3 1 0 2 5) (4 2 1 5 3 0) (4 1 0 2 5 3) (4 5 2 1 0 3) (5 4 1 2 3 0) (5 3 0 1 4 2) (5 2 1 3 4 0) (5 1 2 4 0 3) (5 0 2 4 3 1))

Here is the r7rs scheme program I use to compute that solution:

(define-library (indices)
  (export indices)
  (export permutations)
  (export combination)
  (export combinations)

  (export run)

  (import (only (chezscheme) trace-define trace-lambda random trace-let))

  (import (scheme base))
  (import (scheme list))
  (import (scheme comparator))
  (import (scheme hash-table))
  (import (scheme process-context))

  (import (scheme write))

  (begin

    (define (combination k lst)
      (cond
       ((= k 0) '(()))
       ((null? lst) '())
       (else
        (let ((head (car lst))
              (tail (cdr lst)))
          (append (map (lambda (y) (cons head y)) (combination (- k 1) tail))
                  (combination k tail))))))

    (define (factorial n)
      (let loop ((n n)
                 (out 1))
        (if (= n 0)
            out
            (loop (- n 1) (* n out)))))


    (define (%binomial-coefficient n k)
      ;; https://en.wikipedia.org/wiki/Binomial_coefficient#Multiplicative_formula
      (let loop ((i 1)
                 (out 1))
        (if (= i (+ k 1))
            out
            (loop (+ i 1) (* out (/ (- (+ n 1) i) i))))))

    (define (memo proc)
      (let ((m (make-hash-table (make-equal-comparator))))
        (lambda args
          (if (hash-table-contains? m args)
              (hash-table-ref m args)
              (let ((v (apply proc args)))
                (hash-table-set! m args v)
                v)))))

    (define binomial-coefficient
      (memo
       (lambda (n k)
         (cond
          ((= n k) 1)
          ((= k 0) 1)
          (else (%binomial-coefficient n k))))))

    ;; k-combination ranking and unranking procedures according to
    ;; https://en.wikipedia.org/wiki/Combinatorial_number_system

    (define (ranking lst)
      (let loop ((lst (sort < lst)) ;; increasing sequence
                 (k 1)
                 (out 0))
        (if (null? lst)
            out
            (loop (cdr lst) (+ k 1) (+ out (binomial-coefficient (car lst) k))))))

    (define (%unranking k N)
      (let loop ((n (- k 1)))
                 (if (< N (binomial-coefficient (+ n 1) k))
                     n
                     (loop (+ n 1)))))

    (define (unranking k N)
      (let loop ((k k)
                       (N N)
                       (out '()))
                 (if (= k 0)
                     out
                     (let ((m (%unranking k N)))
                       (loop (- k 1) (- N (binomial-coefficient m k)) (cons m out))))))

    (define fresh-random
      (let ((memo (make-hash-table (make-eqv-comparator))))
        (lambda (n)
          (when (= (hash-table-size memo) n)
            (error 'oops "no more fresh number" n
                   ))
          (let loop ()
            (let ((r (random n)))
              (if (hash-table-contains? memo r)
                  (loop)
                  (begin (hash-table-set! memo r #t) r)))))))

    (define (random-k-combination k n)
      (unranking k (fresh-random (binomial-coefficient n k))))

    (define (combinations lst)
      (if (null? lst) '(())
          (let* ((head (car lst))
                 (tail (cdr lst))
                 (s (combinations tail))
                 (v (map (lambda (x) (cons head x)) s)))
            (append s v))))

    ;; (define (combinations lst)
    ;;   (append-map (lambda (k) (combination k lst)) (iota (length lst))))

    (define (permutations s)
      ;; http://rosettacode.org/wiki/Permutations#Scheme
      (cond
       ((null? s) '(()))
       ((null? (cdr s)) (list s))
       (else ;; extract each item in list in turn and permutations the rest
        (let splice ((l '()) (m (car s)) (r (cdr s)))
          (append
           (map (lambda (x) (cons m x)) (permutations (append l r)))
           (if (null? r) '()
               (splice (cons m l) (car r) (cdr r))))))))

    (define (shift lst index)
      (append (drop lst index) (take lst index)))

    (define (rotations lst)
      (reverse! (map (lambda (index) (shift lst index)) (iota (length lst)))))

    (define (prefix? lst other)
      "Return #t if LST is prefix of OTHER"
      (let prefix ((lst lst)
                 (other other))
        (if (null? lst)
            #t
            (if (= (car lst) (car other))
                (prefix (cdr lst) (cdr other))
                #f))))

    (define (indices lst)
      (let ((candidates (permutations lst)))
        (let loop ((out (rotations lst)) ;; all rotations are solutions
                   (combinations (combinations lst)))
          (if (null? combinations)
              (reverse! out)
              (let ((permutations (permutations (car combinations))))
                (if (any (lambda (s) (any (lambda (p) (prefix? p s)) permutations)) out)
                    ;; there is an existing "solution" for the
                    ;; permutations of COMBINATION move to the next
                    ;; combination
                    (loop out (cdr combinations))
                    (loop (cons (find (lambda (c) (if (member c out)
                                                      #f
                                                      (any (lambda (p) (prefix? p c)) permutations)))
                                      candidates)
                                out)
                          (cdr combinations))))))))


    (define (permutation-prefix? c o)
      (any (lambda (p) (prefix? p o)) (permutations c)))

    (define (ok? combinations candidate)
      (every (lambda (c) (any (lambda (p) (permutation-prefix? c p)) candidate)) combinations))

    (define (run)
      (let* ((n (string->number (cadr (command-line))))
             (N (iota n))
             (solution (indices N))
             (min (length solution))
             (rotations (rotations N))
             (R (length rotations))
             ;; other stuff
             (cx (combinations N))
             (px (filter (lambda (x) (not (member x rotations))) (permutations N)))
             ;; other other stuff
             (pn (length px))
             (PN (iota pn)))
        (display "(length solution) => ") (display (length solution))
        (display "\n")
        (display "(length rotations) => ") (display R)
        (display "\n")
        (let try ((x (- (length solution) 1)))
          (let ((count (binomial-coefficient pn (- x R))))
            (let loop ((index 0)
                       (cxx (map (lambda (x) (list-ref px x)) (random-k-combination (- x R) pn))))
              (when (= (modulo index (expt 10 5)) 0)
                (display "n=") (display n) (display " x=") (display x)
                (display " ")
                (display index) (display "/") (display count)  (display "\n"))
              (let ((candidate (append rotations cxx)))
                (let ((continue? (not (ok? cx candidate))))
                  (if continue?
                      (loop (+ index 1)
                            (map (lambda (x) (list-ref px x)) (random-k-combination (- x R) pn)))
                      (begin (display "new solution n=") (display n)
                             (display " length=") (display x)
                             (display " ") (display candidate)
                             (display "\n")
                             (try (- x 1)))))))))))

    ))

With that list of permutations I can query any pattern.

I am wondering if there is a smaller set and whether there is definitive algorithm to compute that kind of set.


Solution

  • Based on this answer https://math.stackexchange.com/a/3146793/23663

    The following program yields a solution that is a minimal solution according to math ™:

        import itertools
        import math
        
        
        f = math.factorial
        bc = lambda n, k: f(n) // f(k) // f(n-k) if k<n else 0
        
        
        def pk(*args):
            print(*args)
            return args[-1]
        
        
        def stringify(iterable):
            return ''.join(str(x) for x in iterable)
        
        
        def combinations(tab):
            out = []
            for i in range(1, len(tab) + 1):
                out.extend(stringify(x) for x in itertools.combinations(tab, i))
            assert len(out) == 2**len(tab) - 1
            return out
        
        
        def ok(solutions, tab):
            cx = combinations(tab)
        
            px = [stringify(x) for x in itertools.permutations(tab)]
        
            for combination in cx:
                pcx = [''.join(x) for x in itertools.permutations(combination)]
                # check for existing solution
                for solution in solutions:
                    if any(solution.startswith(p) for p in pcx):
                        # yeah, there is an existing solution
                        break
                else:
                    print('failed with combination={}'.format(combination))
                    break
            else:
                return True
            return False
        
        
        def run(n):
            tab = list(range(n))
            cx = list(itertools.combinations(tab, n//2))
            for c in cx:
                L = [(i, i in c) for i in tab]
                A = []
                B = []
                while True:
                    for i in range(len(L) - 1):
                        if (not L[i][1]) and L[i + 1][1]:
                            A.append(L[i + 1][0])
                            B.append(L[i][0])
                            L.remove((L[i + 1][0], True))
                            L.remove((L[i][0], False))
                            break
                    else:
                        break
                l = [i for (i, _) in L]
                yield A + l + B
        
        
        
        for i in range(7):
            tab = stringify(range(i))
            solutions = [stringify(x) for x in run(i)]
            assert ok(solutions, tab)
            print("n={}, size={}, solutions={}".format(i, len(solutions), solutions))
    

    The above program output is:

    n=0, size=1, solutions=['']
    n=1, size=1, solutions=['0']
    n=2, size=2, solutions=['01', '10']
    n=3, size=3, solutions=['012', '120', '201']
    n=4, size=6, solutions=['0123', '2031', '3012', '1230', '1302', '2310']
    n=5, size=10, solutions=['01234', '20341', '30142', '40123', '12340', '13402', '14203', '23410', '24013', '34021']
    n=6, size=20, solutions=['012345', '301452', '401253', '501234', '203451', '240513', '250314', '340521', '350124', '450132', '123450', '142503', '152304', '134502', '135024', '145032', '234510', '235104', '245130', '345210']