I have a root Class, College
which holds many Course
classes
class Course:
def __init__(self, name, url, hidden, semester="SS16"):
self.name = name
self.url = url
self.hidden = hidden
self.semester = semester
def __str__(self):
return ('Course: (Name: %s, Semester: %s, url: %s, hidden: %s)' % (self.name, self.semester, self.url, str(self.hidden)))
class College:
def __init__(self, url='http://dummy', courses=set()):
self.url = url
self.courses = courses
def __str__(self):
s = ('College: (url: %s, Courses:[' % (self.url))
s += ', '.join(str(v) for v in self.courses)
return s + "])"
Now I want to save/load my College Class to/from a JSON file. For Encoding I created a custom JSONEncoder
class, which seems to work fine:
from json import JSONEncoder
from json import JSONDecoder
class CollegeEncoder(JSONEncoder):
def default(self, o):
courses = list(map(lambda v: {'name': v.name, 'url': v.url,
'hidden': v.hidden, 'semester': v.semester}, o.courses))
return {'url': o.url, 'courses': courses}
For decoding I wrote a simple function:
def from_json(dct):
if 'url' in dct:
return College(dct['url'])
Now if I test JSON encoding/decoding like this:
myCollege = College()
myCollege.courses.add(Course("Course1", "url1", False))
myCollege.courses.add(Course("Course2", "url2", False))
myCollege.courses.add(Course("Course3", "url3", False))
dump = CollegeEncoder().encode(myCollege)
college = JSONDecoder(object_hook=from_json).decode(dump)
print(college)
At this point was expecting to get only a plain College object with just the URL, but I get my whole College with all its courses and their attributes
College: (url: http://dummy, Courses:[Course: (Name: Course1, Semester: SS16,
url: url1, hidden: False), Course: (Name: Course2, Semester: SS16, url: url2,
hidden: False), Course: (Name: Course3, Semester: SS16, url: url3, hidden: False)])
But I don't understand how my 'object_hook' function knows how to deserialize the sub-classes.
Should I keep it that way or expand custom deserialization? And how could I prevent my from_json
function from behaving that way by default?
This is not the JSON decoder; you used a mutable default argument in your College.__init__
method:
def __init__(self, url='http://dummy', courses=set()):
The courses
default value is created once, when the function is created. You then add values to this one, essentially global set:
myCollege.courses.add(Course("Course1", "url1", False))
That set is never emptied, so when you create a new College
instance, the set is still there:
>>> College.__init__.__defaults__
('http://dummy', {<__main__.Course object at 0x10f55cda0>, <__main__.Course object at 0x10f55cc50>, <__main__.Course object at 0x10f55ccf8>})
>>> foo = College()
>>> foo.courses
{<__main__.Course object at 0x10f55cda0>, <__main__.Course object at 0x10f55cc50>, <__main__.Course object at 0x10f55ccf8>}
>>> print(foo)
College: (url: http://dummy, Courses:[Course: (Name: Course3, Semester: SS16, url: url3, hidden: False), Course: (Name: Course1, Semester: SS16, url: url1, hidden: False), Course: (Name: Course2, Semester: SS16, url: url2, hidden: False)])
See "Least Astonishment" and the Mutable Default Argument. Set courses
to a sentinel instead, and create a new set in the __init__
method if the default is still set to the sentinel. None
is a good choice here:
class College:
def __init__(self, url='http://dummy', courses=None):
self.url = url
self.courses = courses or set()