Voryoji commited on
Commit
b799660
1 Parent(s): c575b04

Upload 2 files

Browse files
Files changed (2) hide show
  1. bigbench.py +288 -0
  2. mmlu.py +160 -0
bigbench.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+
3
+ # Lint as: python3
4
+ """bigbench datasets"""
5
+
6
+ from __future__ import absolute_import, division, print_function
7
+
8
+ import json
9
+ import os
10
+ import textwrap
11
+ import six
12
+ import datasets
13
+
14
+
15
+ CITATION = r"""
16
+ @article{srivastava2022beyond,
17
+ title={Beyond the imitation game: Quantifying and extrapolating the capabilities of language models},
18
+ author={Srivastava, Aarohi and Rastogi, Abhinav and Rao, Abhishek and Shoeb, Abu Awal Md and Abid, Abubakar and Fisch, Adam and Brown, Adam R and Santoro, Adam and Gupta, Aditya and Garriga-Alonso, Adri{\`a} and others},
19
+ journal={arXiv preprint arXiv:2206.04615},
20
+ year={2022}
21
+ }
22
+ """
23
+
24
+ DESCRIPTION = """\
25
+ bigbench json tasks
26
+ """
27
+
28
+ DATA_URL = "https://www.dropbox.com/s/cjdywlalikdb1c6/bigbench.zip?dl=1"
29
+
30
+ CONFIGS=['abstract_narrative_understanding',
31
+ 'anachronisms',
32
+ 'analogical_similarity',
33
+ 'analytic_entailment',
34
+ 'arithmetic',
35
+ 'ascii_word_recognition',
36
+ 'authorship_verification',
37
+ 'auto_categorization',
38
+ 'auto_debugging',
39
+ 'bbq_lite_json',
40
+ 'bridging_anaphora_resolution_barqa',
41
+ 'causal_judgment',
42
+ 'cause_and_effect',
43
+ 'checkmate_in_one',
44
+ 'chess_state_tracking',
45
+ 'chinese_remainder_theorem',
46
+ 'cifar10_classification',
47
+ 'code_line_description',
48
+ 'codenames',
49
+ 'color',
50
+ 'common_morpheme',
51
+ 'conceptual_combinations',
52
+ 'conlang_translation',
53
+ 'contextual_parametric_knowledge_conflicts',
54
+ 'crash_blossom',
55
+ 'crass_ai',
56
+ 'cryobiology_spanish',
57
+ 'cryptonite',
58
+ 'cs_algorithms',
59
+ 'dark_humor_detection',
60
+ 'date_understanding',
61
+ 'disambiguation_qa',
62
+ 'discourse_marker_prediction',
63
+ 'disfl_qa',
64
+ 'dyck_languages',
65
+ 'elementary_math_qa',
66
+ 'emoji_movie',
67
+ 'emojis_emotion_prediction',
68
+ 'empirical_judgments',
69
+ 'english_proverbs',
70
+ 'english_russian_proverbs',
71
+ 'entailed_polarity',
72
+ 'entailed_polarity_hindi',
73
+ 'epistemic_reasoning',
74
+ 'evaluating_information_essentiality',
75
+ 'fact_checker',
76
+ 'fantasy_reasoning',
77
+ 'few_shot_nlg',
78
+ 'figure_of_speech_detection',
79
+ 'formal_fallacies_syllogisms_negation',
80
+ 'gem',
81
+ 'gender_inclusive_sentences_german',
82
+ 'general_knowledge',
83
+ 'geometric_shapes',
84
+ 'goal_step_wikihow',
85
+ 'gre_reading_comprehension',
86
+ 'hhh_alignment',
87
+ 'hindi_question_answering',
88
+ 'hindu_knowledge',
89
+ 'hinglish_toxicity',
90
+ 'human_organs_senses',
91
+ 'hyperbaton',
92
+ 'identify_math_theorems',
93
+ 'identify_odd_metaphor',
94
+ 'implicatures',
95
+ 'implicit_relations',
96
+ 'indic_cause_and_effect',
97
+ 'intent_recognition',
98
+ 'international_phonetic_alphabet_nli',
99
+ 'international_phonetic_alphabet_transliterate',
100
+ 'intersect_geometry',
101
+ 'irony_identification',
102
+ 'kanji_ascii',
103
+ 'kannada',
104
+ 'key_value_maps',
105
+ 'known_unknowns',
106
+ 'language_games',
107
+ 'language_identification',
108
+ 'linguistic_mappings',
109
+ 'linguistics_puzzles',
110
+ 'list_functions',
111
+ 'logic_grid_puzzle',
112
+ 'logical_args',
113
+ 'logical_deduction',
114
+ 'logical_fallacy_detection',
115
+ 'logical_sequence',
116
+ 'mathematical_induction',
117
+ 'matrixshapes',
118
+ 'medical_questions_russian',
119
+ 'metaphor_boolean',
120
+ 'metaphor_understanding',
121
+ 'minute_mysteries_qa',
122
+ 'misconceptions',
123
+ 'misconceptions_russian',
124
+ 'mnist_ascii',
125
+ 'modified_arithmetic',
126
+ 'moral_permissibility',
127
+ 'movie_dialog_same_or_different',
128
+ 'movie_recommendation',
129
+ 'mult_data_wrangling',
130
+ 'navigate',
131
+ 'nonsense_words_grammar',
132
+ 'novel_concepts',
133
+ 'object_counting',
134
+ 'odd_one_out',
135
+ 'operators',
136
+ 'paragraph_segmentation',
137
+ 'parsinlu_qa',
138
+ 'parsinlu_reading_comprehension',
139
+ 'penguins_in_a_table',
140
+ 'periodic_elements',
141
+ 'persian_idioms',
142
+ 'phrase_relatedness',
143
+ 'physical_intuition',
144
+ 'physics',
145
+ 'physics_questions',
146
+ 'play_dialog_same_or_different',
147
+ 'polish_sequence_labeling',
148
+ 'presuppositions_as_nli',
149
+ 'qa_wikidata',
150
+ 'question_selection',
151
+ 'real_or_fake_text',
152
+ 'reasoning_about_colored_objects',
153
+ 'repeat_copy_logic',
154
+ 'rephrase',
155
+ 'rhyming',
156
+ 'riddle_sense',
157
+ 'ruin_names',
158
+ 'salient_translation_error_detection',
159
+ 'scientific_press_release',
160
+ 'semantic_parsing_in_context_sparc',
161
+ 'semantic_parsing_spider',
162
+ 'sentence_ambiguity',
163
+ 'similarities_abstraction',
164
+ 'simp_turing_concept',
165
+ 'simple_arithmetic_json',
166
+ 'simple_arithmetic_json_multiple_choice',
167
+ 'simple_arithmetic_json_subtasks',
168
+ 'simple_arithmetic_multiple_targets_json',
169
+ 'simple_ethical_questions',
170
+ 'simple_text_editing',
171
+ 'snarks',
172
+ 'social_iqa',
173
+ 'social_support',
174
+ 'sports_understanding',
175
+ 'strange_stories',
176
+ 'strategyqa',
177
+ 'sufficient_information',
178
+ 'suicide_risk',
179
+ 'swahili_english_proverbs',
180
+ 'swedish_to_german_proverbs',
181
+ 'symbol_interpretation',
182
+ 'tellmewhy',
183
+ 'temporal_sequences',
184
+ 'tense',
185
+ 'timedial',
186
+ 'topical_chat',
187
+ 'tracking_shuffled_objects',
188
+ 'understanding_fables',
189
+ 'undo_permutation',
190
+ 'unit_conversion',
191
+ 'unit_interpretation',
192
+ 'unnatural_in_context_learning',
193
+ 'vitaminc_fact_verification',
194
+ 'what_is_the_tao',
195
+ 'which_wiki_edit',
196
+ 'winowhy',
197
+ 'word_sorting',
198
+ 'word_unscrambling']
199
+
200
+ class bigbench_Config(datasets.BuilderConfig):
201
+ """BuilderConfig for bigbench."""
202
+
203
+ def __init__(
204
+ self,
205
+ text_features,
206
+ label_classes=None,
207
+ process_label=lambda x: x,
208
+ **kwargs,
209
+ ):
210
+ """BuilderConfig for bigbench.
211
+ Args:
212
+ text_features: `dict[string, string]`, map from the name of the feature
213
+ dict for each text field to the name of the column in the tsv file
214
+ data_url: `string`, url to download the zip file from
215
+ data_dir: `string`, the path to the folder containing the tsv files in the
216
+ downloaded zip
217
+ citation: `string`, citation for the data set
218
+ url: `string`, url for information about the data set
219
+ """
220
+
221
+ super(bigbench_Config, self).__init__(
222
+ version=datasets.Version("1.0.0", ""), **kwargs
223
+ )
224
+
225
+ self.text_features = text_features
226
+ self.data_url = DATA_URL
227
+ self.data_dir = self.name #os.path.join("bigbench", self.name)
228
+ self.citation = textwrap.dedent(CITATION)
229
+ self.description = ""
230
+ self.url = "https://github.com/google/BIG-bench"
231
+
232
+
233
+ class bigbench(datasets.GeneratorBasedBuilder):
234
+
235
+ """The General Language Understanding Evaluation (bigbench) benchmark."""
236
+
237
+ BUILDER_CONFIG_CLASS = bigbench_Config
238
+
239
+ BUILDER_CONFIGS = [
240
+ bigbench_Config(
241
+ name=name,
242
+ text_features={"inputs": "inputs"},
243
+ ) for name in CONFIGS
244
+ ]
245
+
246
+ def _info(self):
247
+ features = {
248
+ "inputs": datasets.Value("string"),
249
+ "targets": datasets.features.Sequence(datasets.Value("string")),
250
+ "multiple_choice_targets": datasets.features.Sequence(datasets.Value("string")),
251
+ "multiple_choice_scores": datasets.features.Sequence(datasets.Value("int32")),
252
+
253
+ }
254
+ features["idx"] = datasets.Value("int32")
255
+ return datasets.DatasetInfo(
256
+ description=DESCRIPTION,
257
+ features=datasets.Features(features),
258
+ homepage=self.config.url,
259
+ citation=self.config.citation + "\n" + CITATION,
260
+ )
261
+
262
+ def _split_generators(self, dl_manager):
263
+ dl_dir = dl_manager.download_and_extract(self.config.data_url)
264
+ data_dir = os.path.join(dl_dir, self.config.data_dir)
265
+
266
+ return [
267
+ datasets.SplitGenerator(
268
+ name=datasets.Split.TRAIN,
269
+ gen_kwargs={
270
+ "data_file": os.path.join(data_dir or "", "train.jsonl"),
271
+ "split": "train",
272
+ },
273
+ ),
274
+ datasets.SplitGenerator(
275
+ name=datasets.Split.VALIDATION,
276
+ gen_kwargs={
277
+ "data_file": os.path.join(data_dir or "", "validation.jsonl"),
278
+ "split": "validation",
279
+ },
280
+ ),
281
+ ]
282
+
283
+ def _generate_examples(self, data_file,split):
284
+ """Yields examples."""
285
+ with open(data_file, "r", encoding="utf-8") as f:
286
+ for id_, line in enumerate(f):
287
+ line_dict = json.loads(line)
288
+ yield id_, line_dict
mmlu.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+
17
+ import csv
18
+
19
+ import datasets
20
+
21
+
22
+ _CITATION = """\
23
+ @article{hendryckstest2021,
24
+ title={Measuring Massive Multitask Language Understanding},
25
+ author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
26
+ journal={Proceedings of the International Conference on Learning Representations (ICLR)},
27
+ year={2021}
28
+ }
29
+ """
30
+
31
+ _DESCRIPTION = """\
32
+ This is a massive multitask test consisting of multiple-choice questions from various branches of knowledge, covering 57 tasks including elementary mathematics, US history, computer science, law, and more.
33
+ """
34
+
35
+ _HOMEPAGE = "https://github.com/hendrycks/test"
36
+
37
+ _URL = "https://www.dropbox.com/s/nv4z13trkpq80bj/mmlu.tar?dl=1"
38
+
39
+ _SUBJECTS = [
40
+ "abstract_algebra",
41
+ "anatomy",
42
+ "astronomy",
43
+ "business_ethics",
44
+ "clinical_knowledge",
45
+ "college_biology",
46
+ "college_chemistry",
47
+ "college_computer_science",
48
+ "college_mathematics",
49
+ "college_medicine",
50
+ "college_physics",
51
+ "computer_security",
52
+ "conceptual_physics",
53
+ "econometrics",
54
+ "electrical_engineering",
55
+ "elementary_mathematics",
56
+ "formal_logic",
57
+ "global_facts",
58
+ "high_school_biology",
59
+ "high_school_chemistry",
60
+ "high_school_computer_science",
61
+ "high_school_european_history",
62
+ "high_school_geography",
63
+ "high_school_government_and_politics",
64
+ "high_school_macroeconomics",
65
+ "high_school_mathematics",
66
+ "high_school_microeconomics",
67
+ "high_school_physics",
68
+ "high_school_psychology",
69
+ "high_school_statistics",
70
+ "high_school_us_history",
71
+ "high_school_world_history",
72
+ "human_aging",
73
+ "human_sexuality",
74
+ "international_law",
75
+ "jurisprudence",
76
+ "logical_fallacies",
77
+ "machine_learning",
78
+ "management",
79
+ "marketing",
80
+ "medical_genetics",
81
+ "miscellaneous",
82
+ "moral_disputes",
83
+ "moral_scenarios",
84
+ "nutrition",
85
+ "philosophy",
86
+ "prehistory",
87
+ "professional_accounting",
88
+ "professional_law",
89
+ "professional_medicine",
90
+ "professional_psychology",
91
+ "public_relations",
92
+ "security_studies",
93
+ "sociology",
94
+ "us_foreign_policy",
95
+ "virology",
96
+ "world_religions",
97
+ ]
98
+
99
+
100
+ class HendrycksTest(datasets.GeneratorBasedBuilder):
101
+ """Massive multitask MC test consisting of 57 tasks"""
102
+
103
+ BUILDER_CONFIGS = [
104
+ datasets.BuilderConfig(
105
+ name=sub, version=datasets.Version("1.0.0"), description=f"Hendrycks Test Subject {sub}"
106
+ )
107
+ for sub in _SUBJECTS
108
+ ]
109
+
110
+ def _info(self):
111
+ features = datasets.Features(
112
+ {
113
+ "question": datasets.Value("string"),
114
+ "choices": datasets.features.Sequence(datasets.Value("string")),
115
+ "answer": datasets.features.ClassLabel(num_classes=4, names=["A", "B", "C", "D"]),
116
+ }
117
+ )
118
+ return datasets.DatasetInfo(
119
+ description=_DESCRIPTION,
120
+ features=features,
121
+ homepage=_HOMEPAGE,
122
+ citation=_CITATION,
123
+ )
124
+
125
+ def _split_generators(self, dl_manager):
126
+ """Returns SplitGenerators."""
127
+ archive = dl_manager.download(_URL)
128
+ return [
129
+ datasets.SplitGenerator(
130
+ name=datasets.Split.TEST,
131
+ gen_kwargs={"iter_archive": dl_manager.iter_archive(archive), "split": "test"},
132
+ ),
133
+ datasets.SplitGenerator(
134
+ name=datasets.Split.VALIDATION,
135
+ gen_kwargs={
136
+ "iter_archive": dl_manager.iter_archive(archive),
137
+ "split": "val",
138
+ },
139
+ ),
140
+ datasets.SplitGenerator(
141
+ name=datasets.Split("dev"),
142
+ gen_kwargs={
143
+ "iter_archive": dl_manager.iter_archive(archive),
144
+ "split": "dev",
145
+ },
146
+ ),
147
+ ]
148
+
149
+ def _generate_examples(self, iter_archive, split):
150
+ """Yields examples as (key, example) tuples."""
151
+ n_yielded_files = 0
152
+ for id_file, (path, file) in enumerate(iter_archive):
153
+ if f"data/{split}/" in path:
154
+ if f"{self.config.name}_{split}.csv" in path:
155
+ n_yielded_files += 1
156
+ lines = (line.decode("utf-8") for line in file)
157
+ reader = csv.reader(lines)
158
+ for id_line, data in enumerate(reader):
159
+ yield f"{id_file}_{id_line}", {"question": data[0], "choices": data[1:5], "answer": data[5]}
160
+ break