I am implementing a unification algorithm in JavaScript to compute the most general unifier of two given terms. In brief, unification is the process of taking two terms t1
and t2
and combining them into a new term t
which is specialization of both t1
and t2
. Consider (note that variables are capitalized):
t1 := foo(jane, A, B)
t2 := foo(X, Y, rocks)
The specialization operator ⊏
denotes that “a is a specialization of b” in a ⊏ b
. For example:
foo(jane, doe, X) ⊏ t1
foo(on, the, rocks) ⊏ t2
A unifier of two terms is a term that specializes both those terms. For example:
foo(jane, street, rocks) ⊏ t1
foo(jane, street, rocks) ⊏ t2
The most general unifier of two terms is a unifier of those two terms which generalizes all other unifiers of those two terms (i.e. all the other unifiers specialize the most general unifier). For example:
foo(jane, street, rocks) ⊏ foo(jane, M, rocks)
Hence foo(jane, M, rocks)
is the most general unifier of t1
and t2
. The following algorithm can be used to compute the most general unifier for terms of the first-order predicate logic.
t1
and t2
are constants then they unify if and only if they are the same constant (e.g. jane
, street
and rocks
are constants).t1
is a variable and t2
is a non-variable (i.e. a constant or a complex term) then t1
is instantiated to t2
if and only if t1
doesn't occur in t2
.t2
is a variable and t1
is a non-variable (i.e. a constant or a complex term) then t2
is instantiated to t1
if and only if t2
doesn't occur in t1
.t1
and t2
are both variables then they are both instantiated to each other and are said to share values.t1
and t2
are complex terms then they unify if and only if they are the same kind of complex term and their corresponding arguments unify.Anyway, this is how I wanted to tackle this problem:
function variable(name) {
return {
type: "variable",
name: name,
term: null
};
}
function constant(name) {
return {
type: "non_variable",
name: name,
args: []
};
}
function complex(name, args) {
return {
type: "non_variable",
name: name,
args: args
};
}
Now, we can define t1
and t2
as follows:
var t1 = complex("foo", [constant("jane"), variable("A"), variable("B")]);
var t2 = complex("foo", [variable("X"), variable("Y"), constant("rocks")]);
Then we implement the unification algorithm:
function unify(a, b) {
var x = resolve(a);
var y = resolve(b);
var operation = 0;
if (x.type === "non_variable") operation += 2;
if (y.type === "non_variable") operation += 1;
switch (operation) {
case 0:
return x.term = y.term = variable(x.name);
case 1:
if (occurs(x, y)) throw new Error(x.name + " occurs in " + show(y));
return x.term = y;
case 2:
if (occurs(y, x)) throw new Error(y.name + " occurs in " + show(x));
return y.term = x;
case 3:
if (x.name !== y.name) throw new
Error(x.name + " and " + y.name + " are different terms");
var ax = x.args;
var ay = y.args;
if (ax.length !== ay.length) throw new
Error(x.name + " and " + y.name + " have different arities");
var args = ax.map(function (t, i) {
return unify(t, ay[i]);
});
return complex(x.name, args);
}
}
function resolve(t) {
if (t.type === "non_variable") return t;
var v = t;
while (v.type === "variable" && v.term) v = v.term;
return t.term = v;
}
function show(t) {
return t.name + "(" + t.args.map(function (t) {
return t.type === "non_variable" &&
t.args.length > 0 ? show(t) : t.name;
}).join(", ") + ")";
}
function occurs(v, t) {
return t.args.some(function (t) {
t = resolve(t);
return t.type === "variable" ? t === v : occurs(v, t);
});
}
And it works:
var t = unify(t1, t2);
alert(show(t));
<script>
function variable(name) {
return {
type: "variable",
name: name,
term: null
};
}
function constant(name) {
return {
type: "non_variable",
name: name,
args: []
};
}
function complex(name, args) {
return {
type: "non_variable",
name: name,
args: args
};
}
var t1 = complex("foo", [constant("jane"), variable("A"), variable("B")]);
var t2 = complex("foo", [variable("X"), variable("Y"), constant("rocks")]);
function unify(a, b) {
var x = resolve(a);
var y = resolve(b);
var operation = 0;
if (x.type === "non_variable") operation += 2;
if (y.type === "non_variable") operation += 1;
switch (operation) {
case 0:
return x.term = y.term = variable(x.name);
case 1:
if (occurs(x, y)) throw new Error(x.name + " occurs in " + show(y));
return x.term = y;
case 2:
if (occurs(y, x)) throw new Error(y.name + " occurs in " + show(x));
return y.term = x;
case 3:
if (x.name !== y.name) throw new
Error(x.name + " and " + y.name + " are different terms");
var ax = x.args;
var ay = y.args;
if (ax.length !== ay.length) throw new
Error(x.name + " and " + y.name + " have different arities");
var args = ax.map(function (t, i) {
return unify(t, ay[i]);
});
return complex(x.name, args);
}
}
function resolve(t) {
if (t.type === "non_variable") return t;
var v = t;
while (v.type === "variable" && v.term) v = v.term;
return t.term = v;
}
function show(t) {
return t.name + "(" + t.args.map(function (t) {
return t.type === "non_variable" &&
t.args.length > 0 ? show(t) : t.name;
}).join(", ") + ")";
}
function occurs(v, t) {
return t.args.some(function (t) {
t = resolve(t);
return t.type === "variable" ? t === v : occurs(v, t);
});
}
</script>
However, I have only one problem: the way I have implemented first-class variables is not very nice. The way I have implemented it is as follows:
term
property which is null
initially (to show that the variable has not yet been instantiated).term
property is set to the non-variable, which is fine.term
property of one variable to the other because that would result in a cyclic reference. Therefore, I create a new variable and set both term
properties of both the variables to the new variable.term
chain will become longer which is why I need the resolve
function in my code above. It traverses the term
chain and returns the last term in the chain.So my question is whether there is a better way to create first-class variables in JavaScript (i.e. one in which sharing values between two variables doesn't require the use of a term
chain). If there is a better way then please describe the algorithm.
It sounds like you could use a disjoint-set-forest that the terms would be members of. This has efficient union/find.
http://en.wikipedia.org/wiki/Disjoint-set_data_structure#Disjoint-set_forests
A bit of googling reaffirms that this is used in prolog unification implementations.
http://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=2318&context=cstech