pytorchnlpapple-m1huggingfacehuggingface-evaluate

HuggingFace LLM Evaluate: RuntimeError: isin_Tensor_Tensor_out only works on floating types on MPS for pre MacOS_14_0. Received dtype: Long


Context: I tried to create an evaluation pipeline for a text summary task using HuggingFace evaluate packages. I got the issue of receiving dtype Long for the tensor, but I did not feed any long type and the two columns specified for the evaluate pipeline are text only. Further investigation looks like the issue is rooted from torch and my version of Mac (M1). I'm not sure how to proceed with this.

Here is what I did:

My code:

from transformers import pipeline
from evaluate import evaluator
from datasets import load_dataset

# Load data:
booksum = load_dataset("kmfoda/booksum", split="validation[:1000]")

# Load pipeline
pipe = pipeline(
    task="summarization",
    model="pszemraj/led-base-book-summary",
    device="mps"
)

# Setup Evaluate task using Rouge
task_evaluator = evaluator("summarization")

# The code that yield issue:
eval_results = task_evaluator.compute(
    model_or_pipeline=pipe,
    data=booksum,
    metric="rouge",
    input_column="chapter",
    label_column="summary_text"
)

This gives me the value error below:

Short Error message:

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/logits_process.py:157, in MinLengthLogitsProcessor.__call__(self, input_ids, scores)
    154 @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
    155 def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
    156     vocab_tensor = torch.arange(scores.shape[-1], device=scores.device)
--> 157     eos_token_mask = torch.isin(vocab_tensor, self.eos_token_id)
    158     scores_processed = scores.clone()
    159     if input_ids.shape[-1] < self.min_length:

RuntimeError: isin_Tensor_Tensor_out only works on floating types on MPS for pre MacOS_14_0. Received dtype: Long

Full Error message:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[10], line 1
----> 1 eval_results = task_evaluator.compute(
      2     model_or_pipeline=pipe,
      3     data=booksum,
      4     metric="rouge",
      6     input_column="chapter",
      7     label_column="summary_text"
      8 )

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py:191, in SummarizationEvaluator.compute(self, model_or_pipeline, data, subset, split, metric, tokenizer, strategy, confidence_level, n_resamples, device, random_state, input_column, label_column, generation_kwargs)
    166 @add_start_docstrings(
    167     EVALUTOR_COMPUTE_START_DOCSTRING,
    168     TASK_DOCUMENTATION_KWARGS,
   (...)
    189     generation_kwargs: dict = None,
    190 ) -> Tuple[Dict[str, float], Any]:
--> 191     result = super().compute(
    192         model_or_pipeline=model_or_pipeline,
    193         data=data,
    194         subset=subset,
    195         split=split,
    196         metric=metric,
    197         tokenizer=tokenizer,
    198         strategy=strategy,
    199         confidence_level=confidence_level,
    200         n_resamples=n_resamples,
    201         device=device,
    202         random_state=random_state,
    203         input_column=input_column,
    204         label_column=label_column,
    205         generation_kwargs=generation_kwargs,
    206     )
    208     return result

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py:133, in Text2TextGenerationEvaluator.compute(self, model_or_pipeline, data, subset, split, metric, tokenizer, strategy, confidence_level, n_resamples, device, random_state, input_column, label_column, generation_kwargs)
    130 if generation_kwargs is not None:
    131     self.PIPELINE_KWARGS.update(generation_kwargs)
--> 133 result = super().compute(
    134     model_or_pipeline=model_or_pipeline,
    135     data=data,
    136     subset=subset,
    137     split=split,
    138     metric=metric,
    139     tokenizer=tokenizer,
    140     strategy=strategy,
    141     confidence_level=confidence_level,
    142     n_resamples=n_resamples,
    143     device=device,
    144     random_state=random_state,
    145     input_column=input_column,
    146     label_column=label_column,
    147 )
    149 return result

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/base.py:255, in Evaluator.compute(self, model_or_pipeline, data, subset, split, metric, tokenizer, feature_extractor, strategy, confidence_level, n_resamples, device, random_state, input_column, label_column, label_mapping)
    252 metric = self.prepare_metric(metric)
    254 # Compute predictions
--> 255 predictions, perf_results = self.call_pipeline(pipe, pipe_inputs)
    256 predictions = self.predictions_processor(predictions, label_mapping)
    258 metric_inputs.update(predictions)

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/base.py:513, in Evaluator.call_pipeline(self, pipe, *args, **kwargs)
    511 def call_pipeline(self, pipe, *args, **kwargs):
    512     start_time = perf_counter()
--> 513     pipe_output = pipe(*args, **kwargs, **self.PIPELINE_KWARGS)
    514     end_time = perf_counter()
    515     return pipe_output, self._compute_time_perf(start_time, end_time, len(pipe_output))

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.py:269, in SummarizationPipeline.__call__(self, *args, **kwargs)
    245 def __call__(self, *args, **kwargs):
    246     r"""
    247     Summarize the text(s) given as inputs.
    248 
   (...)
    267           ids of the summary.
    268     """
--> 269     return super().__call__(*args, **kwargs)

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.py:167, in Text2TextGenerationPipeline.__call__(self, *args, **kwargs)
    138 def __call__(self, *args, **kwargs):
    139     r"""
    140     Generate the output text(s) using text(s) given as inputs.
    141 
   (...)
    164           ids of the generated text.
    165     """
--> 167     result = super().__call__(*args, **kwargs)
    168     if (
    169         isinstance(args[0], list)
    170         and all(isinstance(el, str) for el in args[0])
    171         and all(len(res) == 1 for res in result)
    172     ):
    173         return [res[0] for res in result]

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/base.py:1235, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
   1231 if can_use_iterator:
   1232     final_iterator = self.get_iterator(
   1233         inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
   1234     )
-> 1235     outputs = list(final_iterator)
   1236     return outputs
   1237 else:

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/pt_utils.py:124, in PipelineIterator.__next__(self)
    121     return self.loader_batch_item()
    123 # We're out of items within a batch
--> 124 item = next(self.iterator)
    125 processed = self.infer(item, **self.params)
    126 # We now have a batch of "inferred things".

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/pt_utils.py:125, in PipelineIterator.__next__(self)
    123 # We're out of items within a batch
    124 item = next(self.iterator)
--> 125 processed = self.infer(item, **self.params)
    126 # We now have a batch of "inferred things".
    127 if self.loader_batch_size is not None:
    128     # Try to infer the size of the batch

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/base.py:1161, in Pipeline.forward(self, model_inputs, **forward_params)
   1159     with inference_context():
   1160         model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1161         model_outputs = self._forward(model_inputs, **forward_params)
   1162         model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
   1163 else:

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.py:191, in Text2TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs)
    184     in_b, input_length = tf.shape(model_inputs["input_ids"]).numpy()
    186 self.check_inputs(
    187     input_length,
    188     generate_kwargs.get("min_length", self.model.config.min_length),
    189     generate_kwargs.get("max_length", self.model.config.max_length),
    190 )
--> 191 output_ids = self.model.generate(**model_inputs, **generate_kwargs)
    192 out_b = output_ids.shape[0]
    193 if self.framework == "pt":

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/utils.py:2028, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
   2020     input_ids, model_kwargs = self._expand_inputs_for_generation(
   2021         input_ids=input_ids,
   2022         expand_size=generation_config.num_beams,
   2023         is_encoder_decoder=self.config.is_encoder_decoder,
   2024         **model_kwargs,
   2025     )
   2027     # 14. run beam sample
-> 2028     result = self._beam_search(
   2029         input_ids,
   2030         beam_scorer,
   2031         logits_processor=prepared_logits_processor,
   2032         logits_warper=prepared_logits_warper,
   2033         stopping_criteria=prepared_stopping_criteria,
   2034         generation_config=generation_config,
   2035         synced_gpus=synced_gpus,
   2036         **model_kwargs,
   2037     )
   2039 elif generation_mode == GenerationMode.GROUP_BEAM_SEARCH:
   2040     # 11. prepare beam search scorer
   2041     beam_scorer = BeamSearchScorer(
   2042         batch_size=batch_size,
   2043         num_beams=generation_config.num_beams,
   (...)
   2049         max_length=generation_config.max_length,
   2050     )

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/utils.py:3200, in GenerationMixin._beam_search(self, input_ids, beam_scorer, logits_processor, stopping_criteria, generation_config, synced_gpus, logits_warper, **model_kwargs)
   3195 next_token_logits = outputs.logits[:, -1, :].clone()
   3196 next_token_scores = nn.functional.log_softmax(
   3197     next_token_logits, dim=-1
   3198 )  # (batch_size * num_beams, vocab_size)
-> 3200 next_token_scores_processed = logits_processor(input_ids, next_token_scores)
   3201 if do_sample:
   3202     next_token_scores_processed = logits_warper(input_ids, next_token_scores_processed)

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/logits_process.py:98, in LogitsProcessorList.__call__(self, input_ids, scores, **kwargs)
     96         scores = processor(input_ids, scores, **kwargs)
     97     else:
---> 98         scores = processor(input_ids, scores)
    100 return scores

File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/logits_process.py:157, in MinLengthLogitsProcessor.__call__(self, input_ids, scores)
    154 @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
    155 def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
    156     vocab_tensor = torch.arange(scores.shape[-1], device=scores.device)
--> 157     eos_token_mask = torch.isin(vocab_tensor, self.eos_token_id)
    158     scores_processed = scores.clone()
    159     if input_ids.shape[-1] < self.min_length:

RuntimeError: isin_Tensor_Tensor_out only works on floating types on MPS for pre MacOS_14_0. Received dtype: Long

Notes: I did try to add device="mps" to the task_evaluator.compute but it gave me another error of ValueError: This pipeline was instantiated on device None but device=mps was passed to 'compute'.


Solution

  • I ran into a similar issue trying to run Facebook's nougat OCR tool.

    The error message mentions macOS 14 (Sonoma) and I was on macOS 13 (Ventura). Upgrading to macOS 14 fixed the issue for me.