dafnymultisetinduction

Dafny sequence filter function and lemmas


Trying to setup a few functions for a quicksort implementation I got stuck on the following lemmas, filterLemmaExtra and filterLemmaSizes.

function filter<T(==)>(xs: seq<T>, p: (T) -> bool): seq<T>
    ensures forall x: T :: x in xs && p(x) ==> x in filter(xs, p)
    ensures forall x: T :: x !in xs && p(x) ==> x !in filter(xs, p)
    ensures forall x: T :: x in filter(xs, p) ==> p(x)
    ensures forall x: T :: x in filter(xs, p) ==> x in xs[0..|xs|]
    ensures forall x: T :: x in filter(xs, p) ==> x in xs
    ensures forall x: T :: x in xs && !p(x) ==> x !in filter(xs, p)
    ensures forall i: nat :: i < |filter(xs, p)| ==> filter(xs, p)[i] in xs
{
    if xs == [] then [] else if p(xs[0]) then [xs[0]] + filter(xs[1..], p) else filter(xs[1..], p)
}

lemma filterLemmaSizes<T(==)>(xs: seq<T>, fxs: seq<T>, p: (T) -> bool)
    requires fxs == filter(xs, p)
    ensures forall x: T :: x in xs && p(x) ==> multiset(xs)[x] == multiset(fxs)[x]
    ensures multiset(filter(xs,p)) <= multiset(xs)
{
}

lemma filterLemmaExtra<T(==)>(xs: seq<T>, p: (T) -> bool, i: nat)
    requires 0 <= i <= |xs|
    ensures filter(xs, p) == filter(xs[0..i], p) + filter(xs[i..], p)
{
}


predicate isNegatedBooleanFn<T(==)>(xs: seq<T>, p: (T) -> bool, q: (T) -> bool) {
    forall x: T :: x in xs && p(x) ==> !q(x)
}

function filter_mset<T(==)>(ms: multiset<T>, p: (T) -> bool): multiset<T> 
    ensures forall x :: x in ms && p(x) ==> x in filter_mset(ms, p) && ms[x] == filter_mset(ms, p)[x]
    ensures forall x :: x in filter_mset(ms, p) ==> p(x)
    ensures forall x :: x in filter_mset(ms, p) ==> x in ms
{
    if ms == multiset{} then multiset{} else
   var x :| x in ms; if p(x) then var result := multiset{}; result[x := ms[x]] + filter_mset(ms[x := 0], p) else filter_mset(ms[x := 0], p)

}

lemma filterAndFilterMset<T(==)>(ms: seq<T>, p: (T) -> bool)
    ensures multiset(filter(ms, p)) == filter_mset(multiset(ms), p)
{
    assert forall x :: x in filter(ms, p) ==> x in multiset(filter(ms, p)) && p(x);
    assert forall x :: x in filter(ms, p) ==> x in filter_mset(multiset(ms), p);
    assert forall x :: x in filter_mset(multiset(ms), p) ==> x in filter(ms, p);
    filterLemmaSizes(ms, filter(ms, p), p);
    assert forall x :: x in filter(ms, p) ==> multiset(filter(ms, p))[x] == filter_mset(multiset(ms), p)[x];
}

lemma filterMS<T(==)>(xs: seq<T>, p: (T) -> bool)
 ensures exists q: (T) -> bool :: isNegatedBooleanFn(xs, p, q)
{
  var q: (T) -> bool := y => !p(y);
  forall x | x in xs
    ensures x in xs && p(x) ==> !q(x)
  {
    if p(x) {
        assert !q(x);
    }
  }
  assert isNegatedBooleanFn(xs, p, q);
//   assert forall x: T :: x in xs && p(x) ==> !q(x);
  
}

lemma filterMsetAndSum<T(==)>(xs: seq<T>, ms: multiset<T>, p: (T) -> bool)
    requires ms == multiset(xs)
    ensures exists Q: (T) -> bool :: isNegatedBooleanFn(xs, p, Q) && (filter_mset(ms, p) + filter_mset(ms, Q)) == ms
{
    filterMS(xs, p);
    var Q :| isNegatedBooleanFn(xs, p, Q);
    var sum_ms := filter_mset(ms, p) + filter_mset(ms, Q);
    forall x | x in ms 
        ensures ms[x] == sum_ms[x]
    {
        if p(x) {
            assert x in filter_mset(ms, p);
            assert filter_mset(ms, p)[x] == ms[x];
            assert x in sum_ms;
            assert sum_ms[x] == ms[x];
        }else {
            assert x in filter_mset(ms, Q);
            assert filter_mset(ms, Q)[x] == ms[x];
            assert x in sum_ms;
            assert sum_ms[x] == ms[x];
        }
    }
    assert sum_ms == ms;
}



My initial implementation of filterLemmaExtra gets bogged down when I try to assert the indices of the concatenated sequences are equal to the filter.

lemma filterLemmaExtra<T(==)>(xs: seq<T>, p: (T) -> bool, i: nat)
    requires 0 <= i <= |xs|
    ensures filter(xs, p) == filter(xs[0..i], p) + filter(xs[i..], p)
{
        assert xs == xs[0..i] +  xs[i..];
        var allxs := set x | x in xs && p(x);
        var leftxs := set x | x in xs[0..i] && p(x);
        var rightxs := set x | x in xs[i..] && p(x);
        assert allxs == leftxs + rightxs;
        forall x | x in filter(xs, p) 
            ensures x in filter(xs[0..i], p) || x in filter(xs[i..], p)
        {
            assert x in xs ==> x in xs[0..i] || x in xs[i..];
        }
        var all := filter(xs[0..i], p) + filter(xs[i..], p);
        assert |filter(xs, p)| == |all|;
        // forall i: nat | i < |filter(xs,p)| //explodes
        //     ensures filter(xs, p)[i] == (filter(xs[0..i], p) + filter(xs[i..], p))[i]
        // {

        // }
}

For the filterLemmaSizes I thought of two approaches. Initially trying to break down the seqences and the filtered sequence but apart from the case that the first element in both sequences match I can't see how to do induction on the rest of the cases.

Then I thought maybe that I could try to do a proof by negation on the multiset values but I'm not sure of how to write those statements. It seems you should be able to assert that that if multiset(xs)[x] == #non-zero number then there exist that many indices in the original array that satisfy p(x) and so they should also be in filter(xs, p);.

lemma filterLemmaSizes<T(==)>(xs: seq<T>, fxs: seq<T>, p: (T) -> bool)
    requires fxs == filter(xs, p)
    ensures forall x: T :: x in xs && p(x) ==> multiset(xs)[x] == multiset(fxs)[x]
    ensures multiset(filter(xs,p)) <= multiset(xs)
{
    forall x | x in xs && p(x) 
        ensures multiset(xs)[x] == multiset(fxs)[x]
    {
        assert x in multiset(xs);
        assert x in xs[0..|xs|];
        assert x in multiset(fxs);
        assert x in fxs[0..|fxs|];
        if multiset(xs)[x] != multiset(fxs)[x] && multiset(xs)[x] < multiset(filter(xs, p))[x] {


        } else if multiset(xs)[x] != multiset(fxs)[x] && multiset(xs)[x] > multiset(filter(xs, p))[x] {

        }

        // if xs != [] && p(xs[0]) && x == xs[0] {
        //     assert xs == [xs[0]] + xs[1..];
        //     assert multiset(xs) == multiset{xs[0]} + multiset(xs[1..]);
        //     assert multiset(xs)[x] == multiset{xs[0]}[x] + multiset(xs[1..])[x];
        //     assert multiset(xs)[x] == multiset{xs[0]}[x] + multiset(xs[1..])[x];
        //     assert xs[0] == fxs[0];
        //     assert multiset(fxs) == multiset{xs[0]} + multiset(filter(xs[1..],p));
        //     assert x in xs;
        //     if x in xs[1..] {
        //         calc {
        //             multiset(xs)[x];
        //             ==
        //             multiset{x}[x] + multiset(xs[1..])[x];
        //             == {assert 1 == multiset{xs[0]}[x];}
        //             1 + multiset(xs[1..])[x];
        //             == { filterLemmaSizes(xs[1..], filter(xs[1..],p), p); }
        //             1 + multiset(filter(xs[1..], p))[x];
        //             ==
        //             multiset{xs[0]}[x] + multiset(filter(xs[1..],p))[x];
        //             ==
        //             multiset(fxs)[x];
        //         }
        //     } else{
        //         assert multiset(xs[1..])[x] == 0;
        //         assert multiset(filter(xs[1..], p))[x] == 0;
        //     }
        //     assert multiset(xs)[xs[0]] == multiset(fxs)[xs[0]];
        // } else if xs != [] && x != xs[0] {
        //     assert xs[0] == fxs[0];

        // } else{

        // }
    }

}


Solution

  • Proving both lemma need appeal to induction. See the code snippet below, I have n't proved first post condition in second lemma but it should be doable using induction too.

    function filter<T>(s: seq<T>, p: T -> bool) : seq<T>
    {
      if s == [] then []
      else if p(s[0]) then [s[0]] + filter(s[1..], p)
                      else filter(s[1..], p)
    }
    
    lemma filterSplit<T>(s: seq<T>, p: T -> bool, idx: nat)
      requires 0 <= idx <= |s|
      ensures filter(s, p) == filter(s[..idx], p) + filter(s[idx..], p)
    {
      if idx == 0 {
      }
      else {
        filterSplit(s[1..], p, idx-1);
        assert filter(s[1..], p) == filter(s[1..][..(idx-1)], p) + filter(s[1..][(idx-1)..], p);
        if p(s[0]) {
          calc {
            filter(s, p);
            [s[0]] + filter(s[1..], p);
            [s[0]] + filter(s[1..][..(idx-1)], p) + filter(s[1..][(idx-1)..], p);
            {
              assert s[..idx] == [s[0]] + s[1..idx];
              assert s[1..idx] == s[1..][..(idx-1)];
            }
            filter(s[..idx], p) + filter(s[1..][(idx-1)..], p);
            {
              assert s[1..][(idx-1)..] == s[idx..];
            }
            filter(s[..idx], p) + filter(s[idx..], p);
          }
        }
        else {}
      }
    }
    
    lemma filterMultiSet<T>(s: seq<T>, p:  T -> bool)
      ensures multiset(filter(s, p)) <= multiset(s)
    {
      if s == [] {
      }
      else {
        filterMultiSet(s[1..], p);
        calc <= {
          multiset(filter(s, p));
          multiset([s[0]]) + multiset(filter(s[1..], p));
          multiset([s[0]]) + multiset(s[1..]);
          {
            assert s == [s[0]] + s[1..];
          }
          multiset(s);
        }
      }
    }
    

    Update : See code snippet below for first postcondition of second lemma

    function filter<T>(s: seq<T>, p: T -> bool) : seq<T>
      ensures forall x :: x !in s ==> x !in filter(s, p)
    {
      if s == [] then []
      else if p(s[0]) then [s[0]] + filter(s[1..], p)
                      else filter(s[1..], p)
    }
    
    lemma filterIncludeMultiSet<T>(s: seq<T>, p: T -> bool)
      ensures forall x :: x in s && p(x) ==> multiset(s)[x] == multiset(filter(s, p))[x]
    {
      if s == [] {}
      else {
        var rs := s[1..];
        filterIncludeMultiSet(rs, p);
        assert forall x :: x in rs && p(x) ==> multiset(rs)[x] == multiset(filter(rs, p))[x];
    
        forall x | x in s && p(x) ensures multiset(s)[x] == multiset(filter(s, p))[x] {
          if x == s[0] {
            if x in rs {
              calc {
                multiset(s)[x];
                {
                  assert s == [s[0]] + rs;
                  assert multiset(s) == multiset([s[0]]) + multiset(rs);
                }
                multiset([s[0]])[x] + multiset(rs)[x];
                1 + multiset(filter(rs, p))[x];
              }
            }
            else {
              calc {
                multiset(s)[x];
                {
                  assert s == [s[0]] + rs;
                  assert multiset(s) == multiset([s[0]]) + multiset(rs);
                }
                multiset([s[0]])[x] + multiset(rs)[x];
                1;
              }
              calc {
                multiset(filter(s, p))[x];
                multiset([s[0]] + filter(rs, p))[x];
                multiset([s[0]])[x] + multiset(filter(rs, p))[x];
                1;
              }
            }
          }
          else {
            calc {
              multiset(s)[x];
              {
                assert s == [s[0]] + rs;
                assert multiset(s) == multiset([s[0]]) + multiset(rs);
              }
              multiset([s[0]])[x] + multiset(rs)[x];
              multiset(rs)[x];
              multiset(filter(rs, p))[x];
            }
          }
        }
      }