update model

Files changed (10) hide show

README.md CHANGED Viewed

@@ -30,7 +30,7 @@ Mistral_Pro_8B_v0.1 showcases superior performance on a range of benchmarks. It
   | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
   | Gemma-7B | 61.9 | 82.2 | 64.6 | 44.8 | 79.0 | 50.9 | 32.3 |
   | Mistral-7B | 60.8 | 83.3 | 62.7 | 42.6 | 78.0 | 39.2 | 28.7 |
-  | Mistral_Pro_8B_v0.1 | 62.6 | 82.5 | 60.7 | 47.6 | 78.1 | 50.3 | 32.3 |
 ## Limitations

   | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
   | Gemma-7B | 61.9 | 82.2 | 64.6 | 44.8 | 79.0 | 50.9 | 32.3 |
   | Mistral-7B | 60.8 | 83.3 | 62.7 | 42.6 | 78.0 | 39.2 | 28.7 |
+  | Mistral_Pro_8B_v0.1 | 63.2 | 82.6 | 60.6 | 48.3 | 78.1 | 50.6 | 32.9 |
 ## Limitations

config.json CHANGED Viewed

@@ -2,7 +2,6 @@
   "architectures": [
     "MistralForCausalLM"
   ],
-  "attention_dropout": 0.0,
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
@@ -19,7 +18,7 @@
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.37.2",
-  "use_cache": false,
   "vocab_size": 32000
 }

   "architectures": [
     "MistralForCausalLM"
   ],
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.34.0.dev0",
+  "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "transformers_version": "4.37.2"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "transformers_version": "4.34.0.dev0"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18bb05a004abb9a8d562fdbc13ecbc071dcad6a4cbd39828c87d0d7c3e3b0625
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:f689c8c817b11e536872f82e5cdd4d76376abdb9fa84da83bcb3b51ac1da2e1f
 size 4943162336

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ff565f6fe465df47de8674b9afb413fd2b8ed6a5ed6df1b7983b0f0d4905b9a
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:518388960a6dcf8058fcb41e03a7f8d6dbe334263d713432bf21b59cd03dd5b8
 size 4999819336

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0e60747d07722721defa773e80bc4d42804468df14b70065dea42e84e595170
 size 4915916184

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc8ee7b371ff025a73041d01cc1ce050c15c4440c62ce6762705dfd34e7e0532
 size 4915916184

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc3710b4a17fae46d1dbfaba133a8be228e0e5bbcc5a5f7703817ff401f37f64
 size 3114400560

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ca3a0b6960d370bffeea03546c057026eac3dc1dfa881570bd29fa62a04ecb9
 size 3114400560

special_tokens_map.json CHANGED Viewed

@@ -1,24 +1,5 @@
 {
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "</s>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "unk_token": "<unk>"
 }

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -33,7 +33,7 @@
   "eos_token": "</s>",
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",

   "eos_token": "</s>",
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",