我对Huggingface实现的微调模型感到困惑。我能够训练我的模型,但当我想预测它时,我总是会得到这个错误。最类似的问题是这。我的变形金刚版本是4.24.0,但它似乎帮不了我。我也尝试过这。下面是我的代码片段。
from transformers import AutoTokenizer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import pipeline
from tqdm import tqdm
from datasets import Dataset
import pandas as pd
import numpy as np
import pyarrow as pa
import gc
import torch as t
import pickle
PATH = './datas/Batch_answers - train_data (no-blank).csv'
EPOCH = 1
LEARNING_RATE = 2e-5
TRAIN_BATCH_SIZE = 16
EVAL_BATCH_SIZE = 16
DEVICE = 'cuda' if t.cuda.is_available() else 'cpu'
df = pd.read_csv(PATH)
df = df.drop(labels='s', axis=1)
df = df.iloc[:, 1:5]
df = df.to_numpy()
qData = []
for i in tqdm(range(len(df))):
argument = df[i][0][1:-1]
response = df[i][1][1:-1]
qprime = df[i][2][1:-1]
qData.append({'statement':argument+'\n'+response, 'argument_sentence_summary':qprime})
qtable = pa.Table.from_pylist(qData)
qDataset = Dataset(qtable)
qDataset = qDataset.train_test_split(train_size=0.8)
qModel = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
qTokenizer = AutoTokenizer.from_pretrained("t5-small")
qData_collator = DataCollatorForSeq2Seq(tokenizer=qTokenizer, model=qModel)
def Qpreprocessing(data):
model_input = qTokenizer(data['statement'], max_length=250, truncation=True)
labels = qTokenizer(text_target=data['argument_sentence_summary'], max_length=75, truncation=True)
model_input['labels'] = labels['input_ids']
return model_input
qToken = qDataset.map(Qpreprocessing, batched=True)
qTraining_args = Seq2SeqTrainingArguments(
output_dir="./result",
evaluation_strategy="epoch",
learning_rate=LEARNING_RATE,
per_device_train_batch_size=TRAIN_BATCH_SIZE,
per_device_eval_batch_size=EVAL_BATCH_SIZE,
weight_decay=0.01,
save_total_limit=3,
num_train_epochs=EPOCH,
fp16=True,
)
qTrainer = Seq2SeqTrainer(
model=qModel,
args=qTraining_args,
train_dataset=qToken['train'],
eval_dataset=qToken['test'],
tokenizer=qTokenizer,
data_collator=qData_collator
)
old_collator = qTrainer.data_collator
qTrainer.data_collator = lambda data: dict(old_collator(data))
qTrainer.train()
qp = pipeline('summarization', model=qModel, tokenizer=qTokenizer)
qp(qDataset['test'][0]['statement']) #break in this line完整的回溯:
RuntimeError Traceback (most recent call last)
Cell In [20], line 3
1 qp = pipeline('summarization', model=qModel, tokenizer=qTokenizer)
2 # temp = t.tensor(qDataset['test'][0]['statement']).to(DEVICE)
----> 3 qp(qDataset['train'][0]['statement'])
File ~\anaconda3\envs\ame\lib\site-packages\transformers\pipelines\text2text_generation.py:250, in SummarizationPipeline.__call__(self, *args, **kwargs)
226 def __call__(self, *args, **kwargs):
227 r"""
228 Summarize the text(s) given as inputs.
229
(...)
248 ids of the summary.
249 """
--> 250 return super().__call__(*args, **kwargs)
File ~\anaconda3\envs\ame\lib\site-packages\transformers\pipelines\text2text_generation.py:150, in Text2TextGenerationPipeline.__call__(self, *args, **kwargs)
121 def __call__(self, *args, **kwargs):
122 r"""
123 Generate the output text(s) using text(s) given as inputs.
124
(...)
147 ids of the generated text.
148 """
--> 150 result = super().__call__(*args, **kwargs)
151 if (
152 isinstance(args[0], list)
153 and all(isinstance(el, str) for el in args[0])
154 and all(len(res) == 1 for res in result)
155 ):
156 return [res[0] for res in result]
File ~\anaconda3\envs\ame\lib\site-packages\transformers\pipelines\base.py:1074, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
1072 return self.iterate(inputs, preprocess_params, forward_params, postprocess_params)
1073 else:
-> 1074 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File ~\anaconda3\envs\ame\lib\site-packages\transformers\pipelines\base.py:1081, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
1079 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
1080 model_inputs = self.preprocess(inputs, **preprocess_params)
-> 1081 model_outputs = self.forward(model_inputs, **forward_params)
1082 outputs = self.postprocess(model_outputs, **postprocess_params)
1083 return outputs
File ~\anaconda3\envs\ame\lib\site-packages\transformers\pipelines\base.py:990, in Pipeline.forward(self, model_inputs, **forward_params)
988 with inference_context():
989 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
--> 990 model_outputs = self._forward(model_inputs, **forward_params)
991 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
992 else:
File ~\anaconda3\envs\ame\lib\site-packages\transformers\pipelines\text2text_generation.py:172, in Text2TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs)
170 generate_kwargs["max_length"] = generate_kwargs.get("max_length", self.model.config.max_length)
171 self.check_inputs(input_length, generate_kwargs["min_length"], generate_kwargs["max_length"])
--> 172 output_ids = self.model.generate(**model_inputs, **generate_kwargs)
173 out_b = output_ids.shape[0]
174 if self.framework == "pt":
File ~\anaconda3\envs\ame\lib\site-packages\torch\autograd\grad_mode.py:27, in _DecoratorContextManager.__call__.<locals>.decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
File ~\anaconda3\envs\ame\lib\site-packages\transformers\generation_utils.py:1339, in GenerationMixin.generate(self, inputs, max_length, min_length, do_sample, early_stopping, num_beams, temperature, penalty_alpha, top_k, top_p, typical_p, repetition_penalty, bad_words_ids, force_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, encoder_no_repeat_ngram_size, num_return_sequences, max_time, max_new_tokens, decoder_start_token_id, use_cache, num_beam_groups, diversity_penalty, prefix_allowed_tokens_fn, logits_processor, renormalize_logits, stopping_criteria, constraints, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, forced_bos_token_id, forced_eos_token_id, remove_invalid_values, synced_gpus, exponential_decay_length_penalty, suppress_tokens, begin_suppress_tokens, forced_decoder_ids, **model_kwargs)
1331 logger.warning(
1332 "A decoder-only architecture is being used, but right-padding was detected! For correct "
1333 "generation results, please set `padding_side='left'` when initializing the tokenizer."
1334 )
1336 if self.config.is_encoder_decoder and "encoder_outputs" not in model_kwargs:
1337 # if model is encoder decoder encoder_outputs are created
1338 # and added to `model_kwargs`
-> 1339 model_kwargs = self._prepare_encoder_decoder_kwargs_for_generation(
1340 inputs_tensor, model_kwargs, model_input_name
1341 )
1343 # 4. Prepare `input_ids` which will be used for auto-regressive generation
1344 if self.config.is_encoder_decoder:
File ~\anaconda3\envs\ame\lib\site-packages\transformers\generation_utils.py:583, in GenerationMixin._prepare_encoder_decoder_kwargs_for_generation(self, inputs_tensor, model_kwargs, model_input_name)
581 encoder_kwargs["return_dict"] = True
582 encoder_kwargs[model_input_name] = inputs_tensor
--> 583 model_kwargs["encoder_outputs"]: ModelOutput = encoder(**encoder_kwargs)
585 return model_kwargs
File ~\anaconda3\envs\ame\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~\anaconda3\envs\ame\lib\site-packages\transformers\models\t5\modeling_t5.py:941, in T5Stack.forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, cross_attn_head_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
939 if inputs_embeds is None:
940 assert self.embed_tokens is not None, "You have to initialize the model with valid token embeddings"
--> 941 inputs_embeds = self.embed_tokens(input_ids)
943 batch_size, seq_length = input_shape
945 # required mask seq length can be calculated via length of past
File ~\anaconda3\envs\ame\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~\anaconda3\envs\ame\lib\site-packages\torch\nn\modules\sparse.py:158, in Embedding.forward(self, input)
157 def forward(self, input: Tensor) -> Tensor:
--> 158 return F.embedding(
159 input, self.weight, self.padding_idx, self.max_norm,
160 self.norm_type, self.scale_grad_by_freq, self.sparse)
File ~\anaconda3\envs\ame\lib\site-packages\torch\nn\functional.py:2199, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2193 # Note [embedding_renorm set_grad_enabled]
2194 # XXX: equivalent to
2195 # with torch.no_grad():
2196 # torch.embedding_renorm_
2197 # remove once script supports set_grad_enabled
2198 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2199 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper__index_select)这是否意味着我需要另一种方法来预测我的测试数据集,而不是使用管道?非常感谢你的帮助。
发布于 2022-11-20 08:12:37
我确实从评论中得到了这个想法。我解决这个问题的方法是,我仍然可以在“cuda”上训练我的qModel,但是如果我想做预测,我需要将我的qModel放在'cpu‘上。因此,我将最后几行代码修改如下:
qTrainer.train()
qModel = qModel.to('cpu') #put my model to cpu
qp = pipeline('summarization', model=qModel, tokenizer=qTokenizer)
print(qp(qDataset['test'][0]['statement']))而且它是有效的。
https://stackoverflow.com/questions/74497166
复制相似问题