I am aware that I can use method chaining by simply having methods return self
, e.g.
object.routine1().routine2().routine3()
But is it possible to organize methods into layers or groups when applying method chaining? e.g.
object.Layer1.routine1().routine2().Layer2.routine3()
The context is that I am trying to build a text analytics pipeline and the different layers would correspond to text level, sentence level and token level preprocessing steps. So what I am trying to build is something like this
text = "This is an example foo text with some special characters!!!! And some sentences"
pr = TextPreprocessor(text)
processed_text = (
pr.text_level.lower_case()
.sentence_level.split_sentences().remove_special_characters()
.token_level.tokenize()
.text
)
This is the code that almost (!) gets the text processing example to work:
import re
class TextLevelPreprocessor:
def __init__(self, parent):
self.parent = parent
def lower_case(self):
self.parent.text = self.parent.text.lower()
return self.parent
class SentenceLevelPreprocessor:
def __init__(self, parent):
self.parent = parent
def split_sentences(self):
self.parent.text = self.parent.text.split('. ')
return self.parent
def remove_special_characters(self):
self.parent.text = [re.sub('[!@#$]', '', s) for s in self.parent.text]
return self.parent
class TokenLevelPreprocessor:
def __init__(self, parent):
self.parent = parent
def tokenize(self):
self.parent.text = [t.split() for t in self.parent.text]
return self.parent
class TextPreprocessor:
def __init__(self, text):
self.text = text
self.text_level = TextLevelPreprocessor(self)
self.sentence_level = SentenceLevelPreprocessor(self)
self.token_level = TokenLevelPreprocessor(self)
However here only this syntax would work
pr = TextPreprocessor(text)
processed_text = (
pr.text_level.lower_case()
.sentence_level.split_sentences().
.sentence_level.remove_special_characters()
.token_level.tokenize()
.text
)
which would mean that one would have to add the "Layer" or "Group" everytime one uses a method, which seems verbose.
You can make each level-specific object a proxy object to the parent object so that it has access to both level-specific methods and parent-specific attributes, and as a bonus level-specific methods can then reference self.text
instead of self.parent.text
:
class TextPreprocessorLevel:
def __init__(self, parent):
self.__dict__['parent'] = parent
def __getattr__(self, name):
return getattr(self.parent, name)
def __setattr__(self, name, value):
setattr(self.parent, name, value)
class TextLevelPreprocessor(TextPreprocessorLevel):
def lower_case(self):
self.text = self.text.lower()
return self
class SentenceLevelPreprocessor(TextPreprocessorLevel):
def split_sentences(self):
self.text = self.text.split('. ')
return self
def remove_special_characters(self):
self.text = [re.sub('[!@#$]', '', s) for s in self.text]
return self
class TokenLevelPreprocessor(TextPreprocessorLevel):
def tokenize(self):
self.text = [t.split() for t in self.text]
return self
class TextPreprocessor:
def __init__(self, text):
self.text = text
self.text_level = TextLevelPreprocessor(self)
self.sentence_level = SentenceLevelPreprocessor(self)
self.token_level = TokenLevelPreprocessor(self)
so that method chaining can work on level-specific instances while allowing access to attributes of the other levels:
text = "This is an example foo text with some special characters. And some sentences"
pr = TextPreprocessor(text)
processed_text = (
pr.text_level.lower_case()
.sentence_level.split_sentences().remove_special_characters()
.token_level.tokenize()
.text
)
print(processed_text)
This outputs:
[['this', 'is', 'an', 'example', 'foo', 'text', 'with', 'some', 'special', 'characters'], ['and', 'some', 'sentences']]
Demo here
The downside of a proxy object, however, is that attributes are dynamically delegated and therefore linters and static type checkers will likely complain about references to these attributes being undefined, so if you want to make them happy you can explicitly define the delegated attributes as properties instead:
class TextPreprocessorLevel:
def __init__(self, parent):
self.parent = parent
@property
def text(self):
return self.parent.text
@text.setter
def text(self, value):
self.parent.text = value
@property
def text_level(self):
return self.parent.text_level
@property
def sentence_level(self):
return self.parent.sentence_level
@property
def token_level(self):
return self.parent.token_level
Demo here