text_generation_pipeline.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. # Copyright (c) 2022 Zhipu.AI
  3. import os
  4. from typing import Any, Dict, List, Optional, Union
  5. import torch
  6. from transformers import GenerationConfig
  7. from modelscope import snapshot_download
  8. from modelscope.metainfo import Pipelines
  9. from modelscope.models.base import Model
  10. from modelscope.outputs import (ModelOutputBase, OutputKeys,
  11. TokenGeneratorOutput)
  12. from modelscope.pipelines.base import Pipeline, Tensor
  13. from modelscope.pipelines.builder import PIPELINES
  14. from modelscope.preprocessors import Preprocessor
  15. from modelscope.utils.chinese_utils import remove_space_between_chinese_chars
  16. from modelscope.utils.constant import ModelFile, Tasks
  17. from modelscope.utils.hub import Config, read_config
  18. from modelscope.utils.logger import get_logger
  19. from modelscope.utils.streaming_output import PipelineStreamingOutputMixin
  20. from modelscope.utils.torch_utils import is_on_same_device
  21. logger = get_logger()
  22. __all__ = [
  23. 'TextGenerationPipeline', 'TextGenerationT5Pipeline',
  24. 'ChatGLM6bTextGenerationPipeline', 'ChatGLM6bV2TextGenerationPipeline',
  25. 'QWenChatPipeline', 'QWenTextGenerationPipeline', 'SeqGPTPipeline',
  26. 'Llama2TaskPipeline'
  27. ]
  28. @PIPELINES.register_module(
  29. Tasks.text_generation, module_name=Pipelines.text_generation)
  30. class TextGenerationPipeline(Pipeline, PipelineStreamingOutputMixin):
  31. def __init__(self,
  32. model: Union[Model, str],
  33. preprocessor: Optional[Preprocessor] = None,
  34. config_file: str = None,
  35. device: str = 'gpu',
  36. auto_collate=True,
  37. first_sequence='sentence',
  38. **kwargs):
  39. """Use `model` and `preprocessor` to create a generation pipeline for prediction.
  40. Args:
  41. model (str or Model): Supply either a local model dir which supported the text generation task,
  42. or a model id from the model hub, or a torch model instance.
  43. preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
  44. the model if supplied.
  45. kwargs (dict, `optional`):
  46. Extra kwargs passed into the preprocessor's constructor.
  47. Examples:
  48. >>> from modelscope.pipelines import pipeline
  49. >>> pipeline_ins = pipeline(task='text-generation',
  50. >>> model='damo/nlp_palm2.0_text-generation_chinese-base')
  51. >>> sentence1 = '本文总结了十个可穿戴产品的设计原则,而这些原则,同样也是笔者认为是这个行业最吸引人的地方:'
  52. >>> '1.为人们解决重复性问题;2.从人开始,而不是从机器开始;3.要引起注意,但不要刻意;4.提升用户能力,而不是取代'
  53. >>> print(pipeline_ins(sentence1))
  54. >>> # Or use the dict input:
  55. >>> print(pipeline_ins({'sentence': sentence1}))
  56. To view other examples please check tests/pipelines/test_text_generation.py.
  57. """
  58. super().__init__(
  59. model=model,
  60. preprocessor=preprocessor,
  61. config_file=config_file,
  62. device=device,
  63. auto_collate=auto_collate,
  64. compile=kwargs.pop('compile', False),
  65. compile_options=kwargs.pop('compile_options', {}),
  66. **kwargs)
  67. assert isinstance(self.model, Model), \
  68. f'please check whether model config exists in {ModelFile.CONFIGURATION}'
  69. if preprocessor is None:
  70. self.preprocessor = Preprocessor.from_pretrained(
  71. self.model.model_dir, first_sequence=first_sequence, **kwargs)
  72. self.model.eval()
  73. self.postprocessor = kwargs.pop('postprocessor', None)
  74. if self.postprocessor is None and hasattr(self.model, 'model_dir'):
  75. # Compatible with old code
  76. cfg = read_config(self.model.model_dir)
  77. self.postprocessor = cfg.get('postprocessor')
  78. if self.postprocessor is None:
  79. self.postprocessor = 'decode'
  80. self.has_logged = False
  81. def _sanitize_parameters(self, **pipeline_parameters):
  82. return {}, pipeline_parameters, {}
  83. def forward(self, inputs: Union[Dict[str, Any], Tensor],
  84. **forward_params) -> Dict[str, Any]:
  85. with torch.no_grad():
  86. try:
  87. return self.model.generate(inputs, **forward_params)
  88. except AttributeError as e:
  89. if not self.has_logged:
  90. logger.warning(
  91. 'When inputs are passed directly, '
  92. f'the error is {e}, '
  93. 'which can be ignored if it runs correctly.')
  94. self.has_logged = True
  95. return self.model.generate(**inputs, **forward_params)
  96. def decode(self, inputs) -> str:
  97. return self.preprocessor.decode(
  98. inputs.tolist(), skip_special_tokens=True)
  99. def sentence_piece(self, inputs) -> str:
  100. return self.preprocessor.decode(inputs.tolist())
  101. def roberta(self, inputs) -> str:
  102. decoded = self.preprocessor.decode(inputs.tolist())
  103. return decoded.replace('<q>', '. ').replace('<mask>',
  104. '. ').replace('</s>', '')
  105. def postprocess(self, inputs: Union[Dict[str, Tensor],
  106. TokenGeneratorOutput],
  107. **postprocess_params) -> Dict[str, str]:
  108. """process the prediction results
  109. Args:
  110. inputs (Dict[str, Any]): _description_
  111. Returns:
  112. Dict[str, str]: the prediction results
  113. """
  114. if isinstance(inputs, (dict, ModelOutputBase)):
  115. inputs = inputs['sequences']
  116. if isinstance(inputs, list) or len(inputs.shape) > 1:
  117. inputs = inputs[0]
  118. decoded = getattr(self, self.postprocessor)(inputs)
  119. text = remove_space_between_chinese_chars(decoded)
  120. return {OutputKeys.TEXT: text}
  121. @PIPELINES.register_module(
  122. Tasks.text2text_generation, module_name=Pipelines.translation_en_to_de)
  123. @PIPELINES.register_module(
  124. Tasks.text2text_generation, module_name=Pipelines.translation_en_to_ro)
  125. @PIPELINES.register_module(
  126. Tasks.text2text_generation, module_name=Pipelines.translation_en_to_fr)
  127. @PIPELINES.register_module(
  128. Tasks.text2text_generation, module_name=Pipelines.text2text_generation)
  129. class TextGenerationT5Pipeline(TextGenerationPipeline):
  130. def __init__(self,
  131. model: Union[Model, str],
  132. preprocessor: Optional[Preprocessor] = None,
  133. sub_task=None,
  134. **kwargs):
  135. super().__init__(model, preprocessor, **kwargs)
  136. self.sub_task = sub_task
  137. self.task_specific_params = self._parse_specific_model_params(
  138. getattr(self.model, 'model_dir', None), 'task_specific_params')
  139. self.min_length = self._parse_specific_model_params(
  140. getattr(self.model, 'model_dir', None), 'min_length')
  141. self.max_length = self._parse_specific_model_params(
  142. getattr(self.model, 'model_dir', None), 'max_length')
  143. def _parse_specific_model_params(self, model_dir, key):
  144. if model_dir is None:
  145. return
  146. cfg: Config = read_config(model_dir)
  147. params = cfg.safe_get(f'model.{key}')
  148. if params is None:
  149. cfg: Config = read_config(os.path.join(model_dir, 'config.json'))
  150. params = cfg.safe_get(key)
  151. return params
  152. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  153. if not isinstance(inputs, str):
  154. raise ValueError(f'Not supported input type: {type(inputs)}')
  155. if self.task_specific_params is not None:
  156. sub_task = self.sub_task or self.model.pipeline.type
  157. if sub_task in self.task_specific_params:
  158. self.model.config.update(self.task_specific_params[sub_task])
  159. if 'prefix' in self.task_specific_params[sub_task]:
  160. inputs = self.task_specific_params[sub_task].prefix + inputs
  161. return super().preprocess(inputs, **preprocess_params)
  162. def forward(self, inputs: Dict[str, Any],
  163. **forward_params) -> Dict[str, Any]:
  164. min_length = forward_params.get('min_length', self.min_length)
  165. max_length = forward_params.get('max_length', self.max_length)
  166. if min_length is not None:
  167. forward_params['min_length'] = min_length
  168. if max_length is not None:
  169. forward_params['max_length'] = max_length
  170. with torch.no_grad():
  171. return self.model.generate(**inputs, **forward_params)
  172. @PIPELINES.register_module(
  173. group_key=Tasks.chat, module_name='chatglm6b-text-generation')
  174. class ChatGLM6bTextGenerationPipeline(Pipeline):
  175. def __init__(self,
  176. model: Union[Model, str],
  177. quantization_bit=None,
  178. use_bf16=False,
  179. **kwargs):
  180. from modelscope.models.nlp.chatglm.text_generation import (
  181. ChatGLMForConditionalGeneration)
  182. if isinstance(model, str):
  183. model_dir = snapshot_download(
  184. model) if not os.path.exists(model) else model
  185. model = ChatGLMForConditionalGeneration.from_pretrained(
  186. model_dir).half()
  187. if torch.cuda.is_available():
  188. model = model.cuda()
  189. if quantization_bit is not None:
  190. model = model.quantize(quantization_bit)
  191. if use_bf16:
  192. model = model.bfloat16()
  193. self.model = model
  194. self.model.eval()
  195. super().__init__(model=model, **kwargs)
  196. def _sanitize_parameters(self, **pipeline_parameters):
  197. return {}, pipeline_parameters, {}
  198. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  199. return inputs
  200. # define the forward pass
  201. def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
  202. inputs.update(forward_params)
  203. return self.model.chat(inputs)
  204. # format the outputs from pipeline
  205. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  206. return input
  207. @PIPELINES.register_module(
  208. group_key=Tasks.chat, module_name='chatglm2_6b-text-generation')
  209. class ChatGLM6bV2TextGenerationPipeline(Pipeline):
  210. def __init__(self,
  211. model: Union[Model, str],
  212. quantization_bit=None,
  213. use_bf16=False,
  214. trust_remote_code: Optional[bool] = None,
  215. **kwargs):
  216. from modelscope import AutoTokenizer
  217. device: str = kwargs.get('device', 'gpu')
  218. if isinstance(model, str):
  219. revision = kwargs.get('revision', None)
  220. model_dir = snapshot_download(
  221. model,
  222. revision=revision) if not os.path.exists(model) else model
  223. default_device_map = None
  224. if device.startswith('gpu') or device.startswith('cuda'):
  225. default_device_map = {'': 0}
  226. device_map = kwargs.get('device_map', default_device_map)
  227. default_torch_dtype = None
  228. if use_bf16:
  229. default_torch_dtype = torch.bfloat16
  230. torch_dtype = kwargs.get('torch_dtype', default_torch_dtype)
  231. model = Model.from_pretrained(
  232. model_dir,
  233. trust_remote_code=trust_remote_code,
  234. device_map=device_map,
  235. torch_dtype=torch_dtype)
  236. else:
  237. if ((device.startswith('gpu') or device.startswith('cuda'))
  238. and is_on_same_device(model)):
  239. model.cuda()
  240. if use_bf16:
  241. model.bfloat16()
  242. if quantization_bit is not None:
  243. model = model.quantize(quantization_bit)
  244. self.model = model
  245. self.model.eval()
  246. self.tokenizer = AutoTokenizer.from_pretrained(
  247. self.model.model_dir, trust_remote_code=trust_remote_code)
  248. super().__init__(model=model, **kwargs)
  249. def _sanitize_parameters(self, **pipeline_parameters):
  250. return {}, pipeline_parameters, {}
  251. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  252. return inputs
  253. # define the forward pass
  254. def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
  255. inputs.update(forward_params)
  256. return self.model.chat(inputs, self.tokenizer)
  257. # format the outputs from pipeline
  258. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  259. return input
  260. @PIPELINES.register_module(group_key=Tasks.chat, module_name='qwen-chat')
  261. class QWenChatPipeline(Pipeline):
  262. def __init__(self, model: Union[Model, str], **kwargs):
  263. from modelscope import AutoModelForCausalLM, AutoTokenizer
  264. torch_dtype = kwargs.get('torch_dtype', torch.bfloat16)
  265. device_map = kwargs.get('device_map', 'auto')
  266. use_max_memory = kwargs.get('use_max_memory', False)
  267. revision = kwargs.get('model_revision', 'v.1.0.5')
  268. trust_remote_code = kwargs.pop('trust_remote_code', None)
  269. if use_max_memory:
  270. max_memory = f'{int(torch.cuda.mem_get_info()[0] / 1024 ** 3) - 2}GB'
  271. n_gpus = torch.cuda.device_count()
  272. max_memory = {i: max_memory for i in range(n_gpus)}
  273. else:
  274. max_memory = None
  275. if torch_dtype == 'bf16' or torch_dtype == torch.bfloat16:
  276. bf16 = True
  277. else:
  278. bf16 = False
  279. if isinstance(model, str):
  280. self.tokenizer = AutoTokenizer.from_pretrained(
  281. model, revision=revision, trust_remote_code=trust_remote_code)
  282. self.model = AutoModelForCausalLM.from_pretrained(
  283. model,
  284. device_map=device_map,
  285. revision=revision,
  286. trust_remote_code=trust_remote_code,
  287. fp16=bf16).eval()
  288. self.model.generation_config = GenerationConfig.from_pretrained(
  289. model,
  290. trust_remote_code=trust_remote_code) # 可指定不同的生成长度、top_p等相关超参
  291. super().__init__(model=self.model, **kwargs)
  292. # skip pipeline model placement
  293. self._model_prepare = True
  294. def _sanitize_parameters(self, **pipeline_parameters):
  295. return {}, pipeline_parameters, {}
  296. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  297. return inputs
  298. # define the forward pass
  299. def forward(self, inputs: Union[Dict, str],
  300. **forward_params) -> Dict[str, Any]:
  301. if isinstance(inputs, Dict):
  302. text = inputs.get('text', None)
  303. history = inputs.get('history', None)
  304. else:
  305. text = inputs
  306. history = forward_params.get('history', None)
  307. system = forward_params.get('system', 'You are a helpful assistant.')
  308. append_history = forward_params.get('append_history', True)
  309. res = self.model.chat(self.tokenizer, text, history, system,
  310. append_history)
  311. return {'response': res[0], 'history': res[1]}
  312. # format the outputs from pipeline
  313. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  314. return input
  315. @PIPELINES.register_module(
  316. group_key=Tasks.text_generation, module_name='qwen-text-generation')
  317. class QWenTextGenerationPipeline(Pipeline):
  318. def __init__(self, model: Union[Model, str], **kwargs):
  319. from modelscope import AutoModelForCausalLM, AutoTokenizer
  320. torch_dtype = kwargs.get('torch_dtype', torch.bfloat16)
  321. device_map = kwargs.get('device_map', 'auto')
  322. use_max_memory = kwargs.get('use_max_memory', False)
  323. revision = kwargs.get('model_revision', 'v.1.0.4')
  324. trust_remote_code = kwargs.pop('trust_remote_code', None)
  325. if use_max_memory:
  326. max_memory = f'{int(torch.cuda.mem_get_info()[0] / 1024 ** 3) - 2}GB'
  327. n_gpus = torch.cuda.device_count()
  328. max_memory = {i: max_memory for i in range(n_gpus)}
  329. else:
  330. max_memory = None
  331. if torch_dtype == 'bf16' or torch_dtype == torch.bfloat16:
  332. bf16 = True
  333. else:
  334. bf16 = False
  335. if isinstance(model, str):
  336. self.model = AutoModelForCausalLM.from_pretrained(
  337. model,
  338. device_map=device_map,
  339. revision=revision,
  340. trust_remote_code=trust_remote_code,
  341. bf16=bf16).eval()
  342. self.tokenizer = AutoTokenizer.from_pretrained(
  343. model, revision=revision, trust_remote_code=trust_remote_code)
  344. self.model.generation_config = GenerationConfig.from_pretrained(
  345. model)
  346. else:
  347. self.model = model
  348. self.tokenizer = kwargs.get('tokenizer', None)
  349. super().__init__(model=self.model, **kwargs)
  350. # skip pipeline model placement
  351. self._model_prepare = True
  352. def _sanitize_parameters(self, **pipeline_parameters):
  353. return {}, pipeline_parameters, {}
  354. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  355. return inputs
  356. # define the forward pass
  357. def forward(self, inputs: str, **forward_params) -> Dict[str, Any]:
  358. inputs = self.tokenizer(inputs, return_tensors='pt').to('cuda:0')
  359. return {
  360. OutputKeys.TEXT:
  361. self.tokenizer.decode(
  362. self.model.generate(**inputs).cpu()[0],
  363. skip_special_tokens=True)
  364. }
  365. # format the outputs from pipeline
  366. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  367. return input
  368. @PIPELINES.register_module(
  369. group_key=Tasks.text_generation, module_name='seqgpt')
  370. class SeqGPTPipeline(Pipeline):
  371. def __init__(self, model: Union[Model, str], **kwargs):
  372. from modelscope import AutoTokenizer
  373. if isinstance(model, str):
  374. model_dir = snapshot_download(
  375. model) if not os.path.exists(model) else model
  376. model = Model.from_pretrained(model_dir)
  377. self.model = model
  378. self.model.eval()
  379. self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
  380. super().__init__(model=model, **kwargs)
  381. def _sanitize_parameters(self, **pipeline_parameters):
  382. return {}, pipeline_parameters, {}
  383. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  384. return inputs
  385. # define the forward pass
  386. def forward(self, prompt: str, **forward_params) -> Dict[str, Any]:
  387. # gen & decode
  388. # prompt += '[GEN]'
  389. input_ids = self.tokenizer(
  390. prompt + forward_params.get('gen_token', ''),
  391. return_tensors='pt',
  392. padding=True,
  393. truncation=True,
  394. max_length=1024)
  395. input_ids = input_ids.input_ids.to(self.model.device)
  396. outputs = self.model.generate(
  397. input_ids, num_beams=4, do_sample=False, max_new_tokens=256)
  398. decoded_sentences = self.tokenizer.batch_decode(
  399. outputs, skip_special_tokens=True)
  400. decoded_sentence = decoded_sentences[0]
  401. decoded_sentence = decoded_sentence[len(prompt):]
  402. return {OutputKeys.TEXT: decoded_sentence}
  403. # format the outputs from pipeline
  404. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  405. return input
  406. @PIPELINES.register_module(
  407. Tasks.text_generation,
  408. module_name=Pipelines.llama2_text_generation_pipeline)
  409. class Llama2TaskPipeline(TextGenerationPipeline):
  410. def __init__(self,
  411. model: Union[Model, str],
  412. preprocessor: Preprocessor = None,
  413. config_file: str = None,
  414. device: str = 'gpu',
  415. auto_collate=True,
  416. **kwargs):
  417. """Use `model` and `preprocessor` to create a generation pipeline for prediction.
  418. Args:
  419. model (str or Model): Supply either a local model dir which supported the text generation task,
  420. or a model id from the model hub, or a torch model instance.
  421. preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
  422. the model if supplied.
  423. kwargs (dict, `optional`):
  424. Extra kwargs passed into the preprocessor's constructor.
  425. Examples:
  426. >>> from modelscope.utils.constant import Tasks
  427. >>> import torch
  428. >>> from modelscope.pipelines import pipeline
  429. >>> from modelscope import snapshot_download, Model
  430. >>> model_dir = snapshot_download("modelscope/Llama-2-13b-chat-ms",
  431. >>> ignore_file_pattern = [r'\\w+\\.safetensors'])
  432. >>> pipe = pipeline(task=Tasks.text_generation, model=model_dir, device_map='auto',
  433. >>> torch_dtype=torch.float16)
  434. >>> inputs="咖啡的作用是什么?"
  435. >>> result = pipe(inputs,max_length=200, do_sample=True, top_p=0.85,
  436. >>> temperature=1.0, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0)
  437. >>> print(result['text'])
  438. To view other examples please check tests/pipelines/test_llama2_text_generation_pipeline.py.
  439. """
  440. self.model = Model.from_pretrained(
  441. model, device_map='auto', torch_dtype=torch.float16)
  442. from modelscope.models.nlp.llama2 import Llama2Tokenizer
  443. self.tokenizer = Llama2Tokenizer.from_pretrained(model)
  444. super().__init__(model=self.model, **kwargs)
  445. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  446. return inputs
  447. def _sanitize_parameters(self, **pipeline_parameters):
  448. return {}, pipeline_parameters, {}
  449. def forward(self,
  450. inputs: str,
  451. max_length: int = 2048,
  452. do_sample: bool = False,
  453. top_p: float = 0.9,
  454. temperature: float = 0.6,
  455. repetition_penalty: float = 1.,
  456. eos_token_id: int = 2,
  457. bos_token_id: int = 1,
  458. pad_token_id: int = 0,
  459. **forward_params) -> Dict[str, Any]:
  460. output = {}
  461. inputs = self.tokenizer(
  462. inputs, add_special_tokens=False, return_tensors='pt')
  463. generate_ids = self.model.generate(
  464. inputs.input_ids.to('cuda'),
  465. max_length=max_length,
  466. do_sample=do_sample,
  467. top_p=top_p,
  468. temperature=temperature,
  469. repetition_penalty=repetition_penalty,
  470. eos_token_id=eos_token_id,
  471. bos_token_id=bos_token_id,
  472. pad_token_id=pad_token_id,
  473. **forward_params)
  474. out = self.tokenizer.batch_decode(
  475. generate_ids,
  476. skip_special_tokens=True,
  477. clean_up_tokenization_spaces=False)[0]
  478. output['text'] = out
  479. return output
  480. # format the outputs from pipeline
  481. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  482. return input
  483. @PIPELINES.register_module(
  484. Tasks.chat, module_name=Pipelines.llama2_text_generation_chat_pipeline)
  485. class Llama2chatTaskPipeline(Pipeline):
  486. """Use `model` and `preprocessor` to create a generation pipeline for prediction.
  487. Args:
  488. model (str or Model): Supply either a local model dir which supported the text generation task,
  489. or a model id from the model hub, or a torch model instance.
  490. preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
  491. the model if supplied.
  492. kwargs (dict, `optional`):
  493. Extra kwargs passed into the preprocessor's constructor.
  494. Examples:
  495. >>> from modelscope.utils.constant import Tasks
  496. >>> import torch
  497. >>> from modelscope.pipelines import pipeline
  498. >>> from modelscope import Model
  499. >>> pipe = pipeline(task=Tasks.chat, model="modelscope/Llama-2-7b-chat-ms", device_map='auto',
  500. >>> torch_dtype=torch.float16, ignore_file_pattern = [r'.+\\.bin$'], model_revision='v1.0.5')
  501. >>> inputs = 'Where is the capital of Zhejiang?'
  502. >>> result = pipe(inputs,max_length=512, do_sample=False, top_p=0.9,
  503. >>> temperature=0.6, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0)
  504. >>> print(result['response'])
  505. >>> inputs = 'What are the interesting places there?'
  506. >>> result = pipe(inputs,max_length=512, do_sample=False, top_p=0.9,
  507. >>> temperature=0.6, repetition_penalty=1., eos_token_id=2, bos_token_id=1,
  508. >>> pad_token_id=0, history=result['history'])
  509. >>> print(result['response'])
  510. >>> inputs = 'What are the company there?'
  511. >>> history_demo = [('Where is the capital of Zhejiang?',
  512. >>> 'Thank you for asking! The capital of Zhejiang Province is Hangzhou.')]
  513. >>> result = pipe(inputs,max_length=512, do_sample=False, top_p=0.9,
  514. >>> temperature=0.6, repetition_penalty=1., eos_token_id=2, bos_token_id=1,
  515. >>> pad_token_id=0, history=history_demo)
  516. >>> print(result['response'])
  517. To view other examples please check tests/pipelines/test_llama2_text_generation_pipeline.py.
  518. """
  519. def __init__(self,
  520. model: Union[Model, str],
  521. preprocessor: Preprocessor = None,
  522. config_file: str = None,
  523. device: str = 'gpu',
  524. auto_collate: bool = True,
  525. **kwargs) -> Dict[str, Any]:
  526. device_map = kwargs.get('device_map', None)
  527. torch_dtype = kwargs.get('torch_dtype', None)
  528. self.model = Model.from_pretrained(
  529. model, device_map=device_map, torch_dtype=torch_dtype)
  530. from modelscope.models.nlp.llama2 import Llama2Tokenizer
  531. self.tokenizer = Llama2Tokenizer.from_pretrained(model)
  532. super().__init__(model=self.model, **kwargs)
  533. def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
  534. return inputs
  535. def _sanitize_parameters(self, **pipeline_parameters):
  536. return {}, pipeline_parameters, {}
  537. def forward(self,
  538. inputs: str,
  539. max_length: int = 2048,
  540. do_sample: bool = False,
  541. top_p: float = 0.9,
  542. temperature: float = 0.6,
  543. repetition_penalty: float = 1.,
  544. eos_token_id: int = 2,
  545. bos_token_id: int = 1,
  546. pad_token_id: int = 0,
  547. system: str = 'you are a helpful assistant!',
  548. history: List = [],
  549. **forward_params) -> Dict[str, Any]:
  550. inputs_dict = forward_params
  551. inputs_dict['text'] = inputs
  552. inputs_dict['max_length'] = max_length
  553. inputs_dict['do_sample'] = do_sample
  554. inputs_dict['top_p'] = top_p
  555. inputs_dict['temperature'] = temperature
  556. inputs_dict['repetition_penalty'] = repetition_penalty
  557. inputs_dict['eos_token_id'] = eos_token_id
  558. inputs_dict['bos_token_id'] = bos_token_id
  559. inputs_dict['pad_token_id'] = pad_token_id
  560. inputs_dict['system'] = system
  561. inputs_dict['history'] = history
  562. output = self.model.chat(inputs_dict, self.tokenizer)
  563. return output
  564. # format the outputs from pipeline
  565. def postprocess(self, input, **kwargs) -> Dict[str, Any]:
  566. return input