configuration.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # Copyright (c) Alibaba Cloud.
  2. #
  3. # This source code is licensed under the license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. from transformers import PretrainedConfig
  6. from modelscope.utils.logger import get_logger
  7. logger = get_logger()
  8. QWEN_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
  9. class QWenConfig(PretrainedConfig):
  10. model_type = 'qwen'
  11. keys_to_ignore_at_inference = ['past_key_values']
  12. attribute_map = {
  13. 'hidden_size': 'n_embd',
  14. 'num_attention_heads': 'n_head',
  15. 'max_position_embeddings': 'n_positions',
  16. 'num_hidden_layers': 'n_layer',
  17. }
  18. def __init__(
  19. self,
  20. vocab_size=151851,
  21. n_embd=4096,
  22. n_layer=32,
  23. n_head=32,
  24. n_inner=None,
  25. embd_pdrop=0.0,
  26. attn_pdrop=0.0,
  27. layer_norm_epsilon=1e-5,
  28. initializer_range=0.02,
  29. scale_attn_weights=True,
  30. use_cache=True,
  31. eos_token_id=151643,
  32. apply_residual_connection_post_layernorm=False,
  33. bf16=True,
  34. kv_channels=128,
  35. rotary_pct=1.0,
  36. rotary_emb_base=10000,
  37. use_dynamic_ntk=False,
  38. use_logn_attn=False,
  39. use_flash_attn=True,
  40. ffn_hidden_size=22016,
  41. no_bias=True,
  42. tie_word_embeddings=False,
  43. **kwargs,
  44. ):
  45. self.eos_token_id = eos_token_id
  46. super().__init__(
  47. eos_token_id=eos_token_id,
  48. tie_word_embeddings=tie_word_embeddings,
  49. **kwargs)
  50. self.vocab_size = vocab_size
  51. self.n_embd = n_embd
  52. self.n_layer = n_layer
  53. self.n_head = n_head
  54. self.n_inner = n_inner
  55. self.embd_pdrop = embd_pdrop
  56. self.attn_pdrop = attn_pdrop
  57. self.layer_norm_epsilon = layer_norm_epsilon
  58. self.initializer_range = initializer_range
  59. self.scale_attn_weights = scale_attn_weights
  60. self.use_cache = use_cache
  61. self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
  62. self.bf16 = bf16
  63. self.kv_channels = kv_channels
  64. self.rotary_pct = rotary_pct
  65. self.rotary_emb_base = rotary_emb_base
  66. self.use_dynamic_ntk = use_dynamic_ntk
  67. self.use_logn_attn = use_logn_attn
  68. self.use_flash_attn = use_flash_attn
  69. self.ffn_hidden_size = ffn_hidden_size
  70. self.no_bias = no_bias
  71. self.tie_word_embeddings = tie_word_embeddings