Spaces:
Runtime error
Runtime error
File size: 4,058 Bytes
208053f d27fe32 208053f d27fe32 208053f 15f5208 208053f d27fe32 208053f d27fe32 208053f d27fe32 208053f d27fe32 208053f 15f5208 208053f 15f5208 208053f d27fe32 208053f 15f5208 208053f d27fe32 208053f d27fe32 208053f 15f5208 208053f d27fe32 208053f d27fe32 208053f d27fe32 028491e 208053f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import enum
import pandas as pd
from tasks import ner, nli, qa, summarization
class LanguageType(enum.Enum):
Low = "Low"
High = "High"
class ModelType(enum.Enum):
English = "English"
Multilingual = "Multilingual"
QA = "QA"
SUMMARIZATION = "Summarization"
NLI = "NLI"
NER = "NER"
def construct_generic_prompt(
task,
instruction,
test_example,
zero_shot,
num_examples,
selected_language,
dataset,
config,
):
print(task)
if task == SUMMARIZATION:
prompt = summarization.construct_prompt(
instruction=instruction,
test_example=test_example,
zero_shot=zero_shot,
dataset=dataset,
num_examples=num_examples,
lang=str(selected_language).lower(),
config=config,
)
elif task == NER:
prompt = ner.construct_prompt(
instruction=instruction,
test_example=test_example,
zero_shot=zero_shot,
dataset=dataset,
num_examples=num_examples,
lang=str(selected_language).lower(),
config=config,
)
elif task == QA:
prompt = qa.construct_prompt(
instruction=instruction,
test_example=test_example,
zero_shot=zero_shot,
num_examples=num_examples,
lang=str(selected_language).lower(),
config=config,
# dataset_name=dataset
)
else:
prompt = nli.construct_prompt(
instruction=instruction,
test_example=test_example,
zero_shot=zero_shot,
num_examples=num_examples,
lang=str(selected_language).lower(),
config=config,
)
return prompt
def _get_language_type(language: str):
df = pd.read_csv("utils/languages_by_word_count.csv")
number_of_words = df[df["Language"] == language]["number of words"].iloc[0]
print(number_of_words)
return LanguageType.Low if number_of_words < 150276400 else LanguageType.High
class Config:
def __init__(
self, prefix="source", context="source", examples="source", output="source"
):
self.prefix = prefix
self.context = context
self.examples = examples
self.output = output
def set(self, prefix=None, context=None, examples=None, output=None):
if prefix:
self.prefix = prefix
if context:
self.context = context
if examples:
self.examples = examples
if output:
self.output = output
def to_dict(self):
return {
"instruction": self.prefix,
"context": self.context,
"examples": self.examples,
"output": self.output,
}
def recommend_config(task, lang, model_type):
language_type = _get_language_type(lang)
config = Config(lang, lang, lang, lang)
if task == QA:
if model_type == ModelType.English.value:
config.set(prefix=lang, context=lang, examples=lang, output=lang)
else:
config.set(prefix="English", context=lang, examples=lang, output=lang)
if task == NER:
if model_type == ModelType.English.value:
config.set(prefix=lang, context=lang, examples=lang, output=lang)
elif language_type == LanguageType.High:
config.set(prefix="English", context=lang, examples=lang, output=lang)
else:
config.set(prefix="English", context=lang, examples=lang, output="English")
if task == NLI:
if model_type == ModelType.English.value:
config.set(prefix=lang, context=lang, examples=lang, output=lang)
elif language_type == LanguageType.High:
config.set(prefix="English", context=lang, examples="English")
else:
config.set(prefix="English", context="English", examples="English")
if task == SUMMARIZATION:
config.set(context="English")
print(config.to_dict())
return config.to_dict()
|