pythontensorflowrecurrent-neural-networksequence-to-sequence

Tensorflow RNN: how to infer a sequence without duplicates?


I'm working on a seq2seq RNN generating an output sequence of labels given a seed label. During the inference step I'd like to generate sequences containing only unique labels (i.e. skip labels that have already been added to the output sequence). To do this I created a sampler object that tries to remember the labels that have been added to the output and reduce their logit value to -np.inf.

Here is the sampler code:

class InferenceSampler(object):
    def __init__(self, out_weights, out_biases):
        self._out_weights = tf.transpose(out_weights)
        self._out_biases = out_biases

        self._n_tracks = out_weights.shape[0]
        self.ids_mask = tf.zeros([self._n_tracks], name="playlist_mask")

    def __call__(self, decoder_outputs):
        _logits = tf.matmul(decoder_outputs, self._out_weights)
        _logits = tf.nn.bias_add(_logits, self._out_biases)

        # apply mask
        _logits = _logits + self.ids_mask

        _sample_ids = tf.cast(tf.argmax(_logits, axis=-1), tf.int32)

        # update mask
        step_ids_mask = tf.sparse_to_dense(_sample_ids, [self._n_tracks], -np.inf)
        self.ids_mask = self.ids_mask + step_ids_mask

        return _sample_ids

The code of the inference graph looks like this:

self._max_playlist_len = tf.placeholder(tf.int32, ())
self._start_tokens = tf.placeholder(tf.int32, [None])

sample_fn = InferenceSampler(out_weights, out_biases)
with tf.name_scope("inf_decoder"):
    def _end_fn(sample_ids):
        return tf.equal(sample_ids, PAD_ITEM_ID)

    def _next_inputs_fn(sample_ids):
        return tf.nn.embedding_lookup(
            track_embs,
            sample_ids
        )

    _start_inputs = tf.nn.embedding_lookup(
        track_embs,
        self._start_tokens
    )

    helper = tf.contrib.seq2seq.InferenceHelper(
        sample_fn=sample_fn,
        sample_shape=[],
        sample_dtype=tf.int32,
        start_inputs=_start_inputs,
        end_fn=_end_fn,
        next_inputs_fn=_next_inputs_fn
    )
    decoder = tf.contrib.seq2seq.BasicDecoder(
        rnn_cell,
        helper,
        rnn_cell.zero_state(tf.shape(self._start_tokens)[0], tf.float32),
        output_layer=projection_layer
    )
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder,
        maximum_iterations=self._max_playlist_len
    )

self.playlists = outputs.sample_id

Unfortunately, the results still have duplicated labels. Moreover, when I try to get access to the sample_fn.ids_mask I receive an error message: ValueError: Operation 'inf_decoder/decoder/while/BasicDecoderStep/add_1' has been marked as not fetchable.

What am I doing wrong? And how legal is to create such sample_fn?


Solution

  • So, after some investigation I found answers to all my questions related to this thread. The main question was: why self.ids_mask in InferenceSampler does not update? The reason is in the internals of dynamic_decode. According to this answer in Tensorflow's issue tracker:

    ... only tensors defined inside the loop will be evaluated every loop iteration. All tensors defined outside a loop will be evaluated exactly once.

    In my case, self.ids_mask is specified outside the loop. That means that I need to re-write dynamic_decode to get what I want. The code below is a bit modified version of the initial task, but it does almost the same.

    Let's start with a new dynamic_decode which should create and update the mask collecting sample_ids that have been already predicted. I removed the code which i didn't modify, follow the initial_mask and mask variables.

    New dynamic_decode:

    def dynamic_decode(decoder,
                       output_time_major=False,
                       impute_finished=False,
                       maximum_iterations=None,
                       parallel_iterations=32,
                       swap_memory=False,
                       scope=None):
        ...
            initial_finished, initial_inputs, initial_mask, initial_state = decoder.initialize()
        ...
            def body(time, outputs_ta, state, inputs, finished, sequence_lengths, mask):
                """Internal while_loop body.
    
                Args:
                    time: scalar int32 tensor.
                    outputs_ta: structure of TensorArray.
                    state: (structure of) state tensors and TensorArrays.
                    inputs: (structure of) input tensors.
                    finished: bool tensor (keeping track of what's finished).
                    sequence_lengths: int32 tensor (keeping track of time of finish).
                    mask: SparseTensor to remove already predicted items
    
                Returns:
                    `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
                      next_sequence_lengths, next_mask)`.
                ```
                """
                (next_outputs, decoder_state, next_inputs, next_mask,
                 decoder_finished) = decoder.step(time, inputs, state, mask)
                ...
                nest.assert_same_structure(state, decoder_state)
                nest.assert_same_structure(outputs_ta, next_outputs)
                nest.assert_same_structure(inputs, next_inputs)
                nest.assert_same_structure(mask, next_mask)
                ...
                return (time + 1, outputs_ta, next_state, next_inputs, next_finished,
                        next_sequence_lengths, next_mask)
    
            res = control_flow_ops.while_loop(
                condition,
                body,
                loop_vars=[
                    initial_time, initial_outputs_ta, initial_state, initial_inputs,
                    initial_finished, initial_sequence_lengths, initial_mask,
                ],
                parallel_iterations=parallel_iterations,
                swap_memory=swap_memory)
        ...
        return final_outputs, final_state, final_sequence_lengths
    

    At the next step mask should be passed to Decoder and Helper. Here are the updated versions of BasicDecoder and InferenceHelper:

    MaskedDecoder:

    class MaskedDecoder(BasicDecoder):
        def step(self, time, inputs, state, mask, name=None):
            with ops.name_scope(name, "MaskedDecoderStep", (time, inputs, state, mask)):
                cell_outputs, cell_state = self._cell(inputs, state)
                if self._output_layer is not None:
                    cell_outputs = self._output_layer(cell_outputs)
                sample_ids = self._helper.sample(
                    time=time,
                    outputs=cell_outputs,
                    state=cell_state,
                    mask=mask)
                (finished, next_inputs, next_state, next_mask) = self._helper.next_inputs(
                    time=time,
                    outputs=cell_outputs,
                    state=cell_state,
                    mask=mask,
                    sample_ids=sample_ids)
            outputs = BasicDecoderOutput(cell_outputs, sample_ids)
            return (outputs, next_state, next_inputs, next_mask, finished)
    

    MaskedInferenceHelper:

    class MaskedInferenceHelper(Helper):
        """A helper to use during inference with a custom sampling function."""
    
        def __init__(self, norm_track_embs, features, start_sample_ids):
            self._norm_track_embs = norm_track_embs
    
            self._batch_size = tf.shape(start_sample_ids)[0]
            self._n_tracks = tf.shape(norm_track_embs)[0]
    
            self._start_sample_ids = start_sample_ids
    
            self._sample_shape = tf.TensorShape([])
            self._sample_dtype = tf.int32
    
            self._features = features
    
        def _get_sparse_mask(self, sample_ids):
            _mask_shape = tf.convert_to_tensor([
                tf.cast(self._batch_size, dtype=tf.int64),
                tf.cast(self._n_tracks, dtype=tf.int64)
            ])
    
            _st_rows = tf.range(0, self._batch_size)
            _st_cols = sample_ids
    
            _st_indices = tf.cast(tf.stack([_st_rows, _st_cols], axis=1), dtype=tf.int64)
            _st_values = tf.fill([self._batch_size], np.inf)
    
            return tf.SparseTensor(_st_indices, _st_values, _mask_shape)
    
        ...
    
        def initialize(self, name=None):
            finished = tf.tile([False], [self._batch_size])
            start_embs = tf.nn.embedding_lookup(self._norm_track_embs, self._start_sample_ids)
            start_inputs = tf.concat([start_embs, self._features], axis=1)
            mask = self._get_sparse_mask(self._start_sample_ids)
            return finished, start_inputs, mask
    
        def sample(self, time, outputs, state, mask, name=None):
            del time, state  # unused by sample
    
            outputs = tf.nn.l2_normalize(outputs, axis=-1)
            cos_sims = tf.matmul(outputs, self._norm_track_embs, transpose_b=True)
            cos_sims = cos_sims - tf.sparse_tensor_to_dense(mask)
            sample_ids = tf.cast(tf.argmax(cos_sims, axis=-1), tf.int32)
    
            return sample_ids
    
        def next_inputs(self, time, outputs, state, sample_ids, mask, name=None):
            del time, outputs  # unused by next_inputs
            finished = tf.tile([False], [self._batch_size])
            next_embs = tf.nn.embedding_lookup(self._norm_track_embs, sample_ids)
            next_inputs = tf.concat([next_embs, self._features], axis=1)
            next_mask = tf.sparse_add(mask, self._get_sparse_mask(sample_ids))
    
            return finished, next_inputs, state, next_mask
    

    So, now I can generate inferences without repetition of already predicted items.