subbu264 commited on
Commit
7bd1291
1 Parent(s): 6cff4b1

Upload configuration_codet5p.py

Browse files
Files changed (1) hide show
  1. configuration_codet5p.py +113 -0
configuration_codet5p.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023 Salesforce authors, The EleutherAI, and HuggingFace Teams. All rights reserved.
3
+
4
+ """ CodeT5+ model configuration"""
5
+ from transformers.configuration_utils import PretrainedConfig
6
+ from transformers.utils import logging
7
+ import copy
8
+
9
+ logger = logging.get_logger(__name__)
10
+
11
+
12
+ # Adapted from transformers.models.codegen.configuration_codegen.CodeGenConfig
13
+ class CodeT5pModuleConfig(PretrainedConfig):
14
+ model_type = "codet5p_module"
15
+ attribute_map = {
16
+ "max_position_embeddings": "n_positions",
17
+ "hidden_size": "n_embd",
18
+ "num_attention_heads": "n_head",
19
+ "num_hidden_layers": "n_layer",
20
+ }
21
+
22
+ def __init__(
23
+ self,
24
+ vocab_size=50400,
25
+ n_positions=2048,
26
+ n_ctx=2048,
27
+ n_embd=4096,
28
+ n_layer=28,
29
+ n_head=16,
30
+ rotary_dim=64,
31
+ n_inner=None,
32
+ activation_function="gelu_new",
33
+ resid_pdrop=0.0,
34
+ embd_pdrop=0.0,
35
+ attn_pdrop=0.0,
36
+ layer_norm_epsilon=1e-5,
37
+ initializer_range=0.02,
38
+ scale_attn_weights=True,
39
+ use_cache=True,
40
+ bos_token_id=50256,
41
+ eos_token_id=50256,
42
+ tie_word_embeddings=False,
43
+ **kwargs
44
+ ):
45
+ self.vocab_size = vocab_size
46
+ self.n_ctx = n_ctx
47
+ self.n_positions = n_positions
48
+ self.n_embd = n_embd
49
+ self.n_layer = n_layer
50
+ self.n_head = n_head
51
+ self.n_inner = n_inner
52
+ self.rotary_dim = rotary_dim
53
+ self.activation_function = activation_function
54
+ self.resid_pdrop = resid_pdrop
55
+ self.embd_pdrop = embd_pdrop
56
+ self.attn_pdrop = attn_pdrop
57
+ self.layer_norm_epsilon = layer_norm_epsilon
58
+ self.initializer_range = initializer_range
59
+ self.scale_attn_weights = scale_attn_weights
60
+ self.use_cache = use_cache
61
+
62
+ self.bos_token_id = bos_token_id
63
+ self.eos_token_id = eos_token_id
64
+
65
+ super().__init__(
66
+ bos_token_id=bos_token_id, eos_token_id=eos_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs
67
+ )
68
+
69
+
70
+ # Adapted from transformers.models.encoder_decoder.configuration_encoder_decoder.EncoderDecoderConfig
71
+ class CodeT5pConfig(PretrainedConfig):
72
+ model_type = "codet5p"
73
+ is_composition = True
74
+
75
+ def __init__(self, **kwargs):
76
+ super().__init__(**kwargs)
77
+ assert (
78
+ "encoder" in kwargs and "decoder" in kwargs
79
+ ), "Config has to be initialized with encoder and decoder config"
80
+ encoder_config = kwargs.pop("encoder")
81
+ decoder_config = kwargs.pop("decoder")
82
+ encoder_model_type = encoder_config.pop("model_type")
83
+ decoder_model_type = decoder_config.pop("model_type")
84
+
85
+ if encoder_model_type != decoder_model_type:
86
+ logger.warning("Encoder and decoder model types are different")
87
+
88
+ self.encoder = CodeT5pModuleConfig(**encoder_config)
89
+ self.decoder = CodeT5pModuleConfig(**decoder_config)
90
+ self.is_encoder_decoder = True
91
+
92
+ @classmethod
93
+ def from_encoder_decoder_configs(
94
+ cls, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, **kwargs
95
+ ) -> PretrainedConfig:
96
+ logger.info("Set `config.is_decoder=True` and `config.add_cross_attention=True` for decoder_config")
97
+ decoder_config.is_decoder = True
98
+ decoder_config.add_cross_attention = True
99
+
100
+ return cls(encoder=encoder_config.to_dict(), decoder=decoder_config.to_dict(), **kwargs)
101
+
102
+ def to_dict(self):
103
+ """
104
+ Serializes this instance to a Python dictionary. Override the default *to_dict()* from *PretrainedConfig*.
105
+
106
+ Returns:
107
+ `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
108
+ """
109
+ output = copy.deepcopy(self.__dict__)
110
+ output["encoder"] = self.encoder.to_dict()
111
+ output["decoder"] = self.decoder.to_dict()
112
+ output["model_type"] = self.__class__.model_type
113
+ return output