pythonpython-hypothesis

Generating these strategies using Hypothesis (strings with repetitions)


Following a tutorial on Hypothesis I found this problem. I have to do a roundtrip test with a run_length encode/decode (explained in docstrings):

from hypothesis import given, strategies as st
from itertools import groupby
from typing import List, Union

def run_length_encoder(in_string: str) -> List[Union[str, int]]:
    """
    >>> run_length_encoder("aaaaabbcbc")
    ['a', 'a', 5, 'b', 'b', 2, 'c', 'b', 'c']
    """
    assert isinstance(in_string, str)
    out = []
    for item, group in groupby(in_string):
        cnt = sum(1 for x in group)
        if cnt == 1:
            out.append(item)
        else:
            out.extend((item, item, cnt))
    assert isinstance(out, list)
    assert all(isinstance(x, (str, int)) for x in out)
    return out

def run_length_decoder(in_list: List[Union[str, int]]) -> str:
    """
    >>> run_length_decoder(['a', 'a', 5, 'b', 'b', 2, 'c', 'b', 'c'])
    "aaaaabbcbc"
    """
    assert isinstance(in_list, list)
    assert all(isinstance(x, (str, int)) for x in in_list)
    out: str = ""
    for item in in_list:
        if isinstance(item, int):
            out += out[-1] * (item - 2)
        else:
            out += item
    # alternative
    # for n, item in enumerate(in_list):
    #    if isinstance(item, int):
    #        char = in_list[n - 1]
    #        assert isinstance(char, str)
    #        out += char * (item - 2)
    #    else:
    #        out += item

    assert isinstance(out, str)
    return out

I can choose the form of the test: encode(decode(in_list)) or decode(encode(in_string)).

@given(
    in_string = st.text()
)
def test_roundtrip_run_length_encoder_decoder(in_string):
    in_string = in_string
    encoded_list = run_length_encoder(in_string)
    assert isinstance(encoded_list, list)
    assert all(isinstance(x, (str, int)) for x in encoded_list)
    decoded_string = run_length_decoder(encoded_list)
    assert isinstance(decoded_string, str)
    assert in_string == decoded_string, (in_string, decoded_string)

test_roundtrip_run_length_encoder_decoder()

This was easy but in_string doesn't have enough repetitions. They ask me to do something better (suggestion: use one_of).

So i should add a random numbers of ripetition with a random lenght (ee vs eee) in random positions. How can I do that with Hypothesis? Maybe they are asking me something more simple

I think to generate the list is more difficoult: I should generate a list without repetitions and add some sequence like [..., 'k', 'k', #, ... where k is a character (string) and # is an integer (int). Ofcourse before 'k' I need a different character.


Solution

  • def text_with_repeats(alphabet=st.characters()):
        # It's fine if this encoder wouldn't round-trip; we just
        # want to make a string with more repeats and it does that.
        return st.lists(
            st.tuples(st.integers(1, 5), alphabet)
        ).map(lambda ls: "".join(n * char for n, char in ls))
    
    @given(
        in_string = st.text() | text_with_repeats()
    )