Wonder-Griffin commited on
Commit
2d8d1b2
1 Parent(s): f1852c9

Upload JudgeXL

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. config.json +35 -36
  3. judge_xl_model.py +140 -0
  4. model.safetensors +2 -2
README.md CHANGED
@@ -2,8 +2,6 @@
2
  base_model:
3
  - Wonder-Griffin/XL-Judge-LLM
4
  - Wonder-Griffin/Judge-GPT2
5
- library_name: transformers
6
- license: wtfpl
7
  datasets:
8
  - fka/awesome-chatgpt-prompts
9
  - BAAI/Infinity-Instruct
@@ -13,6 +11,8 @@ datasets:
13
  - Salesforce/wikitext
14
  language:
15
  - en
 
 
16
  metrics:
17
  - f1
18
  - accuracy
@@ -21,7 +21,7 @@ metrics:
21
  pipeline_tag: text-generation
22
  tags:
23
  - text-generation-inference
24
- inference: True
25
  ---
26
 
27
  # Model Card for Model ID
 
2
  base_model:
3
  - Wonder-Griffin/XL-Judge-LLM
4
  - Wonder-Griffin/Judge-GPT2
 
 
5
  datasets:
6
  - fka/awesome-chatgpt-prompts
7
  - BAAI/Infinity-Instruct
 
11
  - Salesforce/wikitext
12
  language:
13
  - en
14
+ library_name: transformers
15
+ license: wtfpl
16
  metrics:
17
  - f1
18
  - accuracy
 
21
  pipeline_tag: text-generation
22
  tags:
23
  - text-generation-inference
24
+ inference: true
25
  ---
26
 
27
  # Model Card for Model ID
config.json CHANGED
@@ -1,36 +1,35 @@
1
- {
2
- "_name_or_path": "Wonder-Griffin/judge-xl-model",
3
- "activation_function": "gelu",
4
- "architectures": [
5
- "JudgeXL"
6
- ],
7
- "batch_size": 32,
8
- "dropout": 0.1,
9
- "bos_token_id": 50256,
10
- "eos_token_id": 50256,
11
- "ff_expansion_factor": 4,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "LABEL_0",
15
- "1": "LABEL_1",
16
- "2": "LABEL_2",
17
- "3": "LABEL_3",
18
- "4": "LABEL_4"
19
- },
20
- "is_decoder": true,
21
- "label2id": {
22
- "LABEL_0": 0,
23
- "LABEL_1": 1,
24
- "LABEL_2": 2,
25
- "LABEL_3": 3,
26
- "LABEL_4": 4
27
- },
28
- "learning_rate": 5e-05,
29
- "max_len": 512,
30
- "model_type": "judge-xl",
31
- "n_head": 12,
32
- "n_layer": 12,
33
- "torch_dtype": "float32",
34
- "transformers_version": "4.42.4",
35
- "vocab_size": 50257
36
- }
 
1
+ {
2
+ "architectures": [
3
+ "JudgeXL"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "judge_xl_model.JudgeXLConfig",
7
+ "AutoModelForCausalLM": "judge_xl_model.JudgeXL"
8
+ },
9
+ "dropout": 0.1,
10
+ "ff_expansion_factor": 4,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4"
18
+ },
19
+ "is_decoder": true,
20
+ "label2id": {
21
+ "LABEL_0": 0,
22
+ "LABEL_1": 1,
23
+ "LABEL_2": 2,
24
+ "LABEL_3": 3,
25
+ "LABEL_4": 4
26
+ },
27
+ "max_len": 256,
28
+ "model_type": "judge-xl",
29
+ "n_head": 12,
30
+ "n_layer": 12,
31
+ "rnn_units": 768,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.44.2",
34
+ "vocab_size": 50276
35
+ }
 
judge_xl_model.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+ from transformers import PreTrainedModel, PretrainedConfig, AutoModelForCausalLM, AutoConfig
5
+ import logging
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
+
9
+ torch.autograd.set_detect_anomaly(True)
10
+
11
+ class JudgeXLConfig(PretrainedConfig):
12
+ model_type = "judge-xl"
13
+
14
+ def __init__(self, vocab_size=50276, hidden_size=768, max_len=256, n_layer=12, n_head=12,
15
+ ff_expansion_factor=4, rnn_units=768, num_labels=5, dropout=0.1, **kwargs):
16
+ super().__init__(**kwargs)
17
+ self.vocab_size = vocab_size
18
+ self.hidden_size = hidden_size
19
+ self.max_len = max_len
20
+ self.n_layer = n_layer
21
+ self.n_head = n_head
22
+ self.ff_expansion_factor = ff_expansion_factor
23
+ self.rnn_units = rnn_units
24
+ self.num_labels = num_labels
25
+ self.dropout = dropout
26
+ self.is_decoder = True
27
+
28
+ class CustomEmbedding(nn.Module):
29
+ def __init__(self, vocab_size, hidden_size):
30
+ super(CustomEmbedding, self).__init__()
31
+ self.embedding = nn.Embedding(vocab_size, hidden_size)
32
+
33
+ def forward(self, inputs):
34
+ return self.embedding(inputs)
35
+
36
+ class PositionalEncoding(nn.Module):
37
+ def __init__(self, n_embd, max_len=5000):
38
+ super(PositionalEncoding, self).__init__()
39
+ self.n_embd = n_embd
40
+ self.max_len = max_len
41
+ pe = torch.zeros(max_len, n_embd)
42
+ position = torch.arange(0, max_len).unsqueeze(1).float()
43
+ div_term = torch.exp(torch.arange(0, n_embd, 2).float() * -(np.log(10000.0) / n_embd))
44
+ pe[:, 0::2] = torch.sin(position * div_term)
45
+ pe[:, 1::2] = torch.cos(position * div_term)
46
+ pe = pe.unsqueeze(0).transpose(0, 1)
47
+ self.register_buffer('pe', pe)
48
+
49
+ def forward(self, x):
50
+ return x + self.pe[:x.size(0), :]
51
+
52
+ class TransformerXLBlock(nn.Module):
53
+ def __init__(self, config):
54
+ super(TransformerXLBlock, self).__init__()
55
+ self.attn = nn.MultiheadAttention(config.hidden_size, config.n_head, dropout=config.dropout)
56
+ self.ff = FeedForward(config)
57
+ self.ln1 = nn.LayerNorm(config.hidden_size)
58
+ self.ln2 = nn.LayerNorm(config.hidden_size)
59
+
60
+ def forward(self, x, mask=None):
61
+ attn_out, _ = self.attn(x, x, x, attn_mask=mask)
62
+ out1 = self.ln1(x + attn_out)
63
+ ff_out = self.ff(out1)
64
+ return self.ln2(out1 + ff_out)
65
+
66
+ class FeedForward(nn.Module):
67
+ def __init__(self, config):
68
+ super(FeedForward, self).__init__()
69
+ self.dense1 = nn.Linear(config.hidden_size, config.hidden_size * config.ff_expansion_factor)
70
+ self.dense2 = nn.Linear(config.hidden_size * config.ff_expansion_factor, config.hidden_size)
71
+ self.dropout = nn.Dropout(config.dropout)
72
+
73
+ def forward(self, x):
74
+ x = torch.nn.functional.gelu(self.dense1(x))
75
+ x = self.dropout(x)
76
+ return self.dense2(x)
77
+
78
+ class JudgeXL(PreTrainedModel):
79
+ config_class = JudgeXLConfig
80
+
81
+ def __init__(self, config):
82
+ super().__init__(config)
83
+ self.token_embedding = CustomEmbedding(config.vocab_size, config.hidden_size)
84
+ self.pos_encoding = PositionalEncoding(config.hidden_size, config.max_len)
85
+ self.transformer_blocks = nn.ModuleList([TransformerXLBlock(config) for _ in range(config.n_layer)])
86
+ self.ln_f = nn.LayerNorm(config.hidden_size)
87
+ self.rnn = nn.LSTM(config.hidden_size, config.rnn_units, num_layers=2, dropout=config.dropout, bidirectional=True, batch_first=True)
88
+ self.fc = nn.Linear(config.rnn_units * 2, config.vocab_size)
89
+ self.lm_head = nn.Linear(config.rnn_units, config.vocab_size)
90
+ self.post_init()
91
+
92
+ def forward(self, x, mask=None):
93
+ x = self.token_embedding(x)
94
+ x = self.pos_encoding(x)
95
+ for block in self.transformer_blocks:
96
+ x = block(x, mask=mask)
97
+ x = self.ln_f(x)
98
+ x, _ = self.rnn(x)
99
+ x = self.fc(x)
100
+ x = self.lm_head(x)
101
+ return x
102
+ def init_weights(self):
103
+ """
104
+ Initialize weights for your custom layers using PreTrainedModel's default weight initialization method.
105
+ """
106
+ # Hugging Face’s PreTrainedModel has a standard method for initializing weights
107
+ super().init_weights()
108
+ def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
109
+ if past is None:
110
+ return {"input_ids": input_ids}
111
+ else:
112
+ return {"input_ids": input_ids[:, -1:], "past_key_values": past}
113
+
114
+ def _reorder_cache(self, past, beam_idx):
115
+ return tuple(layer_past.index_select(1, beam_idx) for layer_past in past)
116
+ def generate(self, prompt, max_len=100):
117
+ self.eval()
118
+ input_ids = self.tokenizer(prompt, return_tensors='pt').input_ids
119
+ generated = input_ids
120
+ with torch.no_grad():
121
+ for _ in range(max_len):
122
+ outputs = self.forward(generated)
123
+ next_token_logits = outputs[:, :] # Adjusted indexing
124
+ next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(0)
125
+ generated = torch.cat((generated, next_token_id), dim=1)
126
+ if next_token_id.item() == self.tokenizer.sep_token_id:
127
+ break
128
+ generated_text = self.tokenizer.decode(generated[0], skip_special_tokens=True)
129
+ return generated_text
130
+
131
+ config = JudgeXLConfig()
132
+ model = JudgeXL(config)
133
+
134
+ # Register JudgeXLConfig with AutoConfig
135
+ JudgeXLConfig.register_for_auto_class(AutoConfig)
136
+
137
+ # Register JudgeXL with AutoModelForCausalLM
138
+ JudgeXL.register_for_auto_class(AutoModelForCausalLM)
139
+
140
+ model.push_to_hub("Wonder-Griffin/judge-xl-model")
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84aaebd9c547b71caed84e0e314f9ab8893dbeddb054c0f2a9e3ddfff4ab42a3
3
- size 731484912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c514e9ff5c7c297551a51e913a4ad6a636c9ac3de7e470dfe0c5638a0e2821
3
+ size 1053691936