Is there a straightforward algorithm for figuring out if a variable is "used" within a given scope?
In a Python AST, I want to remove all assignments to variables that are not otherwise used anywhere, within a given scope.
In the following code, it is obvious to me (a human), that _hy_anon_var_1
is unused, and therefore the _hy_anon_var_1 = None
statements can be removed without changing the result:
# Before
def hailstone_sequence(n: int) -> Iterable[int]:
while n != 1:
if 0 == n % 2:
n //= 2
_hy_anon_var_1 = None
else:
n = 3 * n + 1
_hy_anon_var_1 = None
yield n
# After
def hailstone_sequence(n: int) -> Iterable[int]:
while n != 1:
if 0 == n % 2:
n //= 2
else:
n = 3 * n + 1
yield n
Extend this to []
-lookups with string literals as keys.
In this example, I would expect _hyx_letXUffffX25['x']
to be eliminated as unused, because _hyx_letXUffffX25
is local to h
, so _hyx_letXUffffX25['x']
is essentially the same thing as a local variable. I would then expect _hyx_letXUffffX25
itself to be eliminated once there are no more references to it.
# Before
def h():
_hyx_letXUffffX25 = {}
_hyx_letXUffffX25['x'] = 5
return 3
# After
def h():
return 3
From what I can tell, this is somewhat of an edge case, and I think the basic algorithmic problem is the same.
Assume that no dynamic name lookups are used in the code.
A name is used if any of these are true in a given scope:
return
statement, an expression on the right-hand side of an assignment statement, a default argument in a function definition, being referenced inside a local function definition, etc.augtarget
therein. This might represent "useless work" in a lot of programs, but for the purpose of this task that's OK and distinct from being an entirely unused name.nonlocal
or global
. These might be useless nonlocals or globals, but because they reach beyond the given scope, it is OK for my purposes to assume that they are "used".Please let me know in the comments if this seems incorrect, or if you think I am missing something.
Variable i
in f
is unused:
def f():
i = 0
return 5
Variable x
in f
is unused:
def f():
def g(x):
return x/5
x = 10
return g(100)
The name x
does appear in g
, but the variable x
in g
is local to g
. It shadows the variable x
created in f
, but the two x
names are not the same variable.
If g
has no parameter x
, then x
is in fact used:
def f():
x = 10
def g():
return x/5
return g(100)
Variable i
in f
is used:
def f():
i = 0
return i
Variable accum
in silly_map
and silly_sum
is used in both examples:
def silly_map(func, data):
data = iter(data)
accum = []
def _impl():
try:
value = next(data)
except StopIteration:
return accum
else:
accum.append(value)
return _impl()
return _impl()
def silly_any(func, data):
data = iter(data)
accum = False
def _impl():
nonlocal accum, data
try:
value = next(data)
except StopIteration:
return accum
else:
if value:
data = []
accum = True
else:
return _impl()
return _impl()
The solution below works in two parts. First, the syntax tree of the source is traversed and all unused target assignment statements are discovered. Second, the tree is traversed again via a custom ast.NodeTransformer
class, which removes these offending assignment statements. The process is repeated until all unused assignment statements are removed. Once this is finished, the final source is written out.
The ast
traverser class
:
import ast, itertools, collections as cl
class AssgnCheck:
def __init__(self, scopes = None):
self.scopes = scopes or cl.defaultdict(list)
@classmethod
def eq_ast(cls, a1, a2):
#check that two `ast`s are the same
if type(a1) != type(a2):
return False
if isinstance(a1, list):
return all(cls.eq_ast(*i) for i in itertools.zip_longest(a1, a2))
if not isinstance(a1, ast.AST):
return a1 == a2
return all(cls.eq_ast(getattr(a1, i, None), getattr(a2, i, None))
for i in set(a1._fields)|set(a2._fields) if i != 'ctx')
def check_exist(self, t_ast, s_path):
#traverse the scope stack and remove scope assignments that are discovered in the `ast`
s_scopes = []
for _ast in t_ast:
for sid in s_path[::-1]:
s_scopes.extend(found:=[b for _, b in self.scopes[sid] if AssgnCheck.eq_ast(_ast, b) and \
all(not AssgnCheck.eq_ast(j, b) for j in s_scopes)])
self.scopes[sid] = [(a, b) for a, b in self.scopes[sid] if b not in found]
def traverse(self, _ast, s_path = [1]):
#walk the ast object itself
_t_ast = None
if isinstance(_ast, ast.Assign): #if assignment statement, add ast object to current scope
self.traverse(_ast.targets[0], s_path)
self.scopes[s_path[-1]].append((True, _ast.targets[0]))
_ast = _ast.value
if isinstance(_ast, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
s_path = [*s_path, (nid:=(1 if not self.scopes else max(self.scopes)+1))]
if isinstance(_ast, (ast.FunctionDef, ast.AsyncFunctionDef)):
self.scopes[nid].extend([(False, ast.Name(i.arg)) for i in _ast.args.args])
_t_ast = [*_ast.args.defaults, *_ast.body]
self.check_exist(_t_ast if _t_ast is not None else [_ast], s_path) #determine if any assignment statement targets have previously defined names
if _t_ast is None:
for _b in _ast._fields:
if isinstance((b:=getattr(_ast, _b)), list):
for i in b:
self.traverse(i, s_path)
elif isinstance(b, ast.AST):
self.traverse(b, s_path)
else:
for _ast in _t_ast:
self.traverse(_ast, s_path)
Putting it all together:
class Visit(ast.NodeTransformer):
def __init__(self, asgn):
super().__init__()
self.asgn = asgn
def visit_Assign(self, node):
#remove assignment nodes marked as unused
if any(node.targets[0] == i for i in self.asgn):
return None
return node
def remove_assgn(f_name):
tree = ast.parse(open(f_name).read())
while True:
r = AssgnCheck()
r.traverse(tree)
if not (k:=[j for b in r.scopes.values() for k, j in b if k]):
break
v = Visit(k)
tree = v.visit(tree)
return ast.unparse(tree)
print(remove_assgn('test_name_assign.py'))
Contents of test_name_assign.py
:
def hailstone_sequence(n: int) -> Iterable[int]:
while n != 1:
if 0 == n % 2:
n //= 2
_hy_anon_var_1 = None
else:
n = 3 * n + 1
_hy_anon_var_1 = None
yield n
Output:
def hailstone_sequence(n: int) -> Iterable[int]:
while n != 1:
if 0 == n % 2:
n //= 2
else:
n = 3 * n + 1
yield n
Contents of test_name_assign.py
:
def h():
_hyx_letXUffffX25 = {}
_hyx_letXUffffX25['x'] = 5
return 3
Output:
def h():
return 3
Contents of test_name_assign.py
:
def f():
i = 0
return 5
Output:
def f():
return 5
Contents of test_name_assign.py
:
def f():
x = 10
def g():
return x/5
return g(100)
Ouptut:
def f():
x = 10
def g():
return x / 5
return g(100)