Context: I tried to create an evaluation pipeline for a text summary task using HuggingFace evaluate packages. I got the issue of receiving dtype Long for the tensor, but I did not feed any long type and the two columns specified for the evaluate pipeline are text only. Further investigation looks like the issue is rooted from torch and my version of Mac (M1). I'm not sure how to proceed with this.
Here is what I did:
My code:
from transformers import pipeline
from evaluate import evaluator
from datasets import load_dataset
# Load data:
booksum = load_dataset("kmfoda/booksum", split="validation[:1000]")
# Load pipeline
pipe = pipeline(
task="summarization",
model="pszemraj/led-base-book-summary",
device="mps"
)
# Setup Evaluate task using Rouge
task_evaluator = evaluator("summarization")
# The code that yield issue:
eval_results = task_evaluator.compute(
model_or_pipeline=pipe,
data=booksum,
metric="rouge",
input_column="chapter",
label_column="summary_text"
)
This gives me the value error below:
Short Error message:
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/logits_process.py:157, in MinLengthLogitsProcessor.__call__(self, input_ids, scores)
154 @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
155 def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
156 vocab_tensor = torch.arange(scores.shape[-1], device=scores.device)
--> 157 eos_token_mask = torch.isin(vocab_tensor, self.eos_token_id)
158 scores_processed = scores.clone()
159 if input_ids.shape[-1] < self.min_length:
RuntimeError: isin_Tensor_Tensor_out only works on floating types on MPS for pre MacOS_14_0. Received dtype: Long
Full Error message:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[10], line 1
----> 1 eval_results = task_evaluator.compute(
2 model_or_pipeline=pipe,
3 data=booksum,
4 metric="rouge",
6 input_column="chapter",
7 label_column="summary_text"
8 )
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py:191, in SummarizationEvaluator.compute(self, model_or_pipeline, data, subset, split, metric, tokenizer, strategy, confidence_level, n_resamples, device, random_state, input_column, label_column, generation_kwargs)
166 @add_start_docstrings(
167 EVALUTOR_COMPUTE_START_DOCSTRING,
168 TASK_DOCUMENTATION_KWARGS,
(...)
189 generation_kwargs: dict = None,
190 ) -> Tuple[Dict[str, float], Any]:
--> 191 result = super().compute(
192 model_or_pipeline=model_or_pipeline,
193 data=data,
194 subset=subset,
195 split=split,
196 metric=metric,
197 tokenizer=tokenizer,
198 strategy=strategy,
199 confidence_level=confidence_level,
200 n_resamples=n_resamples,
201 device=device,
202 random_state=random_state,
203 input_column=input_column,
204 label_column=label_column,
205 generation_kwargs=generation_kwargs,
206 )
208 return result
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/text2text_generation.py:133, in Text2TextGenerationEvaluator.compute(self, model_or_pipeline, data, subset, split, metric, tokenizer, strategy, confidence_level, n_resamples, device, random_state, input_column, label_column, generation_kwargs)
130 if generation_kwargs is not None:
131 self.PIPELINE_KWARGS.update(generation_kwargs)
--> 133 result = super().compute(
134 model_or_pipeline=model_or_pipeline,
135 data=data,
136 subset=subset,
137 split=split,
138 metric=metric,
139 tokenizer=tokenizer,
140 strategy=strategy,
141 confidence_level=confidence_level,
142 n_resamples=n_resamples,
143 device=device,
144 random_state=random_state,
145 input_column=input_column,
146 label_column=label_column,
147 )
149 return result
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/base.py:255, in Evaluator.compute(self, model_or_pipeline, data, subset, split, metric, tokenizer, feature_extractor, strategy, confidence_level, n_resamples, device, random_state, input_column, label_column, label_mapping)
252 metric = self.prepare_metric(metric)
254 # Compute predictions
--> 255 predictions, perf_results = self.call_pipeline(pipe, pipe_inputs)
256 predictions = self.predictions_processor(predictions, label_mapping)
258 metric_inputs.update(predictions)
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/evaluate/evaluator/base.py:513, in Evaluator.call_pipeline(self, pipe, *args, **kwargs)
511 def call_pipeline(self, pipe, *args, **kwargs):
512 start_time = perf_counter()
--> 513 pipe_output = pipe(*args, **kwargs, **self.PIPELINE_KWARGS)
514 end_time = perf_counter()
515 return pipe_output, self._compute_time_perf(start_time, end_time, len(pipe_output))
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.py:269, in SummarizationPipeline.__call__(self, *args, **kwargs)
245 def __call__(self, *args, **kwargs):
246 r"""
247 Summarize the text(s) given as inputs.
248
(...)
267 ids of the summary.
268 """
--> 269 return super().__call__(*args, **kwargs)
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.py:167, in Text2TextGenerationPipeline.__call__(self, *args, **kwargs)
138 def __call__(self, *args, **kwargs):
139 r"""
140 Generate the output text(s) using text(s) given as inputs.
141
(...)
164 ids of the generated text.
165 """
--> 167 result = super().__call__(*args, **kwargs)
168 if (
169 isinstance(args[0], list)
170 and all(isinstance(el, str) for el in args[0])
171 and all(len(res) == 1 for res in result)
172 ):
173 return [res[0] for res in result]
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/base.py:1235, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
1231 if can_use_iterator:
1232 final_iterator = self.get_iterator(
1233 inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
1234 )
-> 1235 outputs = list(final_iterator)
1236 return outputs
1237 else:
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/pt_utils.py:124, in PipelineIterator.__next__(self)
121 return self.loader_batch_item()
123 # We're out of items within a batch
--> 124 item = next(self.iterator)
125 processed = self.infer(item, **self.params)
126 # We now have a batch of "inferred things".
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/pt_utils.py:125, in PipelineIterator.__next__(self)
123 # We're out of items within a batch
124 item = next(self.iterator)
--> 125 processed = self.infer(item, **self.params)
126 # We now have a batch of "inferred things".
127 if self.loader_batch_size is not None:
128 # Try to infer the size of the batch
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/base.py:1161, in Pipeline.forward(self, model_inputs, **forward_params)
1159 with inference_context():
1160 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1161 model_outputs = self._forward(model_inputs, **forward_params)
1162 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
1163 else:
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.py:191, in Text2TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs)
184 in_b, input_length = tf.shape(model_inputs["input_ids"]).numpy()
186 self.check_inputs(
187 input_length,
188 generate_kwargs.get("min_length", self.model.config.min_length),
189 generate_kwargs.get("max_length", self.model.config.max_length),
190 )
--> 191 output_ids = self.model.generate(**model_inputs, **generate_kwargs)
192 out_b = output_ids.shape[0]
193 if self.framework == "pt":
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/utils.py:2028, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2020 input_ids, model_kwargs = self._expand_inputs_for_generation(
2021 input_ids=input_ids,
2022 expand_size=generation_config.num_beams,
2023 is_encoder_decoder=self.config.is_encoder_decoder,
2024 **model_kwargs,
2025 )
2027 # 14. run beam sample
-> 2028 result = self._beam_search(
2029 input_ids,
2030 beam_scorer,
2031 logits_processor=prepared_logits_processor,
2032 logits_warper=prepared_logits_warper,
2033 stopping_criteria=prepared_stopping_criteria,
2034 generation_config=generation_config,
2035 synced_gpus=synced_gpus,
2036 **model_kwargs,
2037 )
2039 elif generation_mode == GenerationMode.GROUP_BEAM_SEARCH:
2040 # 11. prepare beam search scorer
2041 beam_scorer = BeamSearchScorer(
2042 batch_size=batch_size,
2043 num_beams=generation_config.num_beams,
(...)
2049 max_length=generation_config.max_length,
2050 )
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/utils.py:3200, in GenerationMixin._beam_search(self, input_ids, beam_scorer, logits_processor, stopping_criteria, generation_config, synced_gpus, logits_warper, **model_kwargs)
3195 next_token_logits = outputs.logits[:, -1, :].clone()
3196 next_token_scores = nn.functional.log_softmax(
3197 next_token_logits, dim=-1
3198 ) # (batch_size * num_beams, vocab_size)
-> 3200 next_token_scores_processed = logits_processor(input_ids, next_token_scores)
3201 if do_sample:
3202 next_token_scores_processed = logits_warper(input_ids, next_token_scores_processed)
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/logits_process.py:98, in LogitsProcessorList.__call__(self, input_ids, scores, **kwargs)
96 scores = processor(input_ids, scores, **kwargs)
97 else:
---> 98 scores = processor(input_ids, scores)
100 return scores
File ~/.pyenv/versions/3.12.0/envs/llm-aug/lib/python3.12/site-packages/transformers/generation/logits_process.py:157, in MinLengthLogitsProcessor.__call__(self, input_ids, scores)
154 @add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
155 def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
156 vocab_tensor = torch.arange(scores.shape[-1], device=scores.device)
--> 157 eos_token_mask = torch.isin(vocab_tensor, self.eos_token_id)
158 scores_processed = scores.clone()
159 if input_ids.shape[-1] < self.min_length:
RuntimeError: isin_Tensor_Tensor_out only works on floating types on MPS for pre MacOS_14_0. Received dtype: Long
Notes: I did try to add device="mps"
to the task_evaluator.compute
but it gave me another error of ValueError: This pipeline was instantiated on device None but device=mps was passed to 'compute'.
I ran into a similar issue trying to run Facebook's nougat OCR tool.
The error message mentions macOS 14 (Sonoma) and I was on macOS 13 (Ventura). Upgrading to macOS 14 fixed the issue for me.