configuration.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. """ ChatGLM model configuration """
  2. from transformers import PretrainedConfig
  3. from modelscope.utils import logger as logging
  4. logger = logging.get_logger()
  5. class ChatGLM2Config(PretrainedConfig):
  6. model_type = 'chatglm'
  7. def __init__(self,
  8. num_layers=28,
  9. padded_vocab_size=65024,
  10. hidden_size=4096,
  11. ffn_hidden_size=13696,
  12. kv_channels=128,
  13. num_attention_heads=32,
  14. seq_length=2048,
  15. hidden_dropout=0.0,
  16. attention_dropout=0.0,
  17. layernorm_epsilon=1e-5,
  18. rmsnorm=True,
  19. apply_residual_connection_post_layernorm=False,
  20. post_layer_norm=True,
  21. add_bias_linear=False,
  22. add_qkv_bias=False,
  23. bias_dropout_fusion=True,
  24. multi_query_attention=False,
  25. multi_query_group_num=1,
  26. apply_query_key_layer_scaling=True,
  27. attention_softmax_in_fp32=True,
  28. fp32_residual_connection=False,
  29. quantization_bit=0,
  30. pre_seq_len=None,
  31. prefix_projection=False,
  32. rope_ratio=1.0,
  33. **kwargs):
  34. self.num_layers = num_layers
  35. self.vocab_size = padded_vocab_size
  36. self.padded_vocab_size = padded_vocab_size
  37. self.hidden_size = hidden_size
  38. self.ffn_hidden_size = ffn_hidden_size
  39. self.kv_channels = kv_channels
  40. self.num_attention_heads = num_attention_heads
  41. self.seq_length = seq_length
  42. self.hidden_dropout = hidden_dropout
  43. self.attention_dropout = attention_dropout
  44. self.layernorm_epsilon = layernorm_epsilon
  45. self.rmsnorm = rmsnorm
  46. self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
  47. self.post_layer_norm = post_layer_norm
  48. self.add_bias_linear = add_bias_linear
  49. self.add_qkv_bias = add_qkv_bias
  50. self.bias_dropout_fusion = bias_dropout_fusion
  51. self.multi_query_attention = multi_query_attention
  52. self.multi_query_group_num = multi_query_group_num
  53. self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
  54. self.attention_softmax_in_fp32 = attention_softmax_in_fp32
  55. self.fp32_residual_connection = fp32_residual_connection
  56. self.quantization_bit = quantization_bit
  57. self.pre_seq_len = pre_seq_len
  58. self.prefix_projection = prefix_projection
  59. self.rope_ratio = rope_ratio
  60. super().__init__(**kwargs)