| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- """ ChatGLM model configuration """
- from transformers.configuration_utils import PretrainedConfig
- from modelscope.utils import logger as logging
- logger = logging.get_logger()
- class ChatGLMConfig(PretrainedConfig):
- r"""
- This is the configuration class to store the configuration of a [`~ChatGLMModel`].
- It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
- architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
- the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
- Configuration objects inherit from [`PretrainedConfig`] and can be used
- to control the model outputs. Read the documentation from [`PretrainedConfig`]
- for more information.
- Args:
- vocab_size (`int`, *optional*, defaults to 150528):
- Vocabulary size of the ChatGLM-6B model.
- Defines the number of different tokens that can be represented by the
- `inputs_ids` passed when calling [`~ChatGLMModel`] or
- [`~TFChatGLMModel`].
- hidden_size (`int`, *optional*, defaults to 4096):
- Dimension of the encoder layers and the pooler layer.
- num_hidden_layers (`int`, *optional*, defaults to 28):
- Number of hidden layers in the Transformer encoder.
- num_attention_heads (`int`, *optional*, defaults to 32):
- Number of attention heads for each attention layer in the Transformer encoder.
- inner_hidden_size (`int`, *optional*, defaults to 16384):
- Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
- max_sequence_length (`int`, *optional*, defaults to 512):
- The maximum sequence length that this model might ever be used with.
- Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
- layernorm_epsilon (`float`, *optional*, defaults to 1e-5):
- The epsilon used by the layer normalization layers.
- use_cache (`bool`, *optional*, defaults to `True`):
- Whether the model should return the last key/values attentions (not used by all models).
- Example:
- ```python
- >>> from modelscope.models.nlp.chatglm.configuration import ChatGLMConfig
- >>> from modelscope.models.nlp.chatglm.text_generation import ChatGLMModel
- >>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration
- >>> configuration = ChatGLMConfig()
- >>> # Initializing a model from the THUDM/ChatGLM-6B style configuration
- >>> model = ChatGLMModel(configuration)
- >>> # Accessing the model configuration
- >>> configuration = model.config
- ```
- """
- model_type = 'chatglm'
- def __init__(self,
- vocab_size=150528,
- hidden_size=4096,
- num_layers=28,
- num_attention_heads=32,
- layernorm_epsilon=1e-5,
- use_cache=False,
- bos_token_id=150004,
- eos_token_id=150005,
- mask_token_id=150000,
- gmask_token_id=150001,
- pad_token_id=0,
- max_sequence_length=2048,
- inner_hidden_size=16384,
- position_encoding_2d=True,
- quantization_bit=0,
- pre_seq_len=None,
- prefix_projection=False,
- **kwargs):
- self.num_layers = num_layers
- self.vocab_size = vocab_size
- self.hidden_size = hidden_size
- self.num_attention_heads = num_attention_heads
- self.max_sequence_length = max_sequence_length
- self.layernorm_epsilon = layernorm_epsilon
- self.inner_hidden_size = inner_hidden_size
- self.use_cache = use_cache
- self.bos_token_id = bos_token_id
- self.eos_token_id = eos_token_id
- self.pad_token_id = pad_token_id
- self.mask_token_id = mask_token_id
- self.gmask_token_id = gmask_token_id
- self.position_encoding_2d = position_encoding_2d
- self.quantization_bit = quantization_bit
- self.pre_seq_len = pre_seq_len
- self.prefix_projection = prefix_projection
- super().__init__(
- pad_token_id=pad_token_id,
- bos_token_id=bos_token_id,
- eos_token_id=eos_token_id,
- **kwargs)
|