Spaces:
Sleeping
Sleeping
A-Duss
commited on
Commit
·
92a0f65
1
Parent(s):
81c40b2
pydantic
Browse files- app.py +92 -11
- requirements.txt +2 -1
app.py
CHANGED
@@ -50,10 +50,79 @@ def get_score(user_text, language):
|
|
50 |
score = optimized_scorer.score(user_text, language)
|
51 |
formatted_score = f"{score:.4g}"
|
52 |
loaded_languages = optimized_scorer.get_loaded_languages()
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
|
55 |
language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh']
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
css = '''
|
58 |
#gen_btn{height: 100%}
|
59 |
#title{text-align: center}
|
@@ -95,13 +164,6 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
95 |
)
|
96 |
with gr.Row():
|
97 |
user_text = gr.Textbox(label='Input text', placeholder='Type something here...')
|
98 |
-
language_choice = gr.Dropdown(
|
99 |
-
choices=language_options,
|
100 |
-
label="Choose a language",
|
101 |
-
info="Type to search",
|
102 |
-
value="en",
|
103 |
-
allow_custom_value=True,
|
104 |
-
)
|
105 |
with gr.Column(scale=0):
|
106 |
submit_btn = gr.Button("Submit")
|
107 |
score = gr.HTML(
|
@@ -109,17 +171,36 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
109 |
label="Output"
|
110 |
)
|
111 |
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
gr.Examples(examples=example_inputs, inputs=user_text)
|
115 |
|
116 |
gr.Markdown(
|
117 |
"""
|
|
|
|
|
|
|
|
|
|
|
118 |
This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly.
|
119 |
|
120 |
This scorer can be used to filter useful information from large text corpora in many languages.
|
121 |
-
|
122 |
-
This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package.
|
123 |
"""
|
124 |
)
|
125 |
|
|
|
50 |
score = optimized_scorer.score(user_text, language)
|
51 |
formatted_score = f"{score:.4g}"
|
52 |
loaded_languages = optimized_scorer.get_loaded_languages()
|
53 |
+
display_loaded_languages = [('Currently loaded languages:', None)]
|
54 |
+
for language in loaded_languages:
|
55 |
+
display_loaded_languages.append((language_map[language], language))
|
56 |
+
display_loaded_languages.append((' ', None))
|
57 |
+
return f'<div class="nice-box"> Score: {formatted_score}</div>', display_loaded_languages
|
58 |
|
59 |
language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh']
|
60 |
|
61 |
+
language_map = {
|
62 |
+
'am': 'Amharic',
|
63 |
+
'ar': 'Arabic',
|
64 |
+
'bg': 'Bulgarian',
|
65 |
+
'bn': 'Bengali',
|
66 |
+
'cs': 'Czech',
|
67 |
+
'da': 'Danish',
|
68 |
+
'de': 'German',
|
69 |
+
'el': 'Greek',
|
70 |
+
'en': 'English',
|
71 |
+
'es': 'Spanish',
|
72 |
+
'fa': 'Persian',
|
73 |
+
'fi': 'Finnish',
|
74 |
+
'fr': 'French',
|
75 |
+
'gu': 'Gujarati',
|
76 |
+
'ha': 'Hausa',
|
77 |
+
'hi': 'Hindi',
|
78 |
+
'hu': 'Hungarian',
|
79 |
+
'id': 'Indonesian',
|
80 |
+
'it': 'Italian',
|
81 |
+
'ja': 'Japanese',
|
82 |
+
'jv': 'Javanese',
|
83 |
+
'kn': 'Kannada',
|
84 |
+
'ko': 'Korean',
|
85 |
+
'lt': 'Lithuanian',
|
86 |
+
'mr': 'Marathi',
|
87 |
+
'nl': 'Dutch',
|
88 |
+
'no': 'Norwegian',
|
89 |
+
'yo': 'Yoruba',
|
90 |
+
'zh': 'Chinese'
|
91 |
+
}
|
92 |
+
|
93 |
+
color_map = {
|
94 |
+
"am": "green", # Ethiopia's flag has green
|
95 |
+
"ar": "black", # Many Arab flags feature black
|
96 |
+
"bg": "white", # Bulgaria's flag has white
|
97 |
+
"bn": "green", # Bangladesh's flag is green and red
|
98 |
+
"cs": "blue", # Czech Republic's flag has blue
|
99 |
+
"da": "red", # Denmark's flag is red and white
|
100 |
+
"de": "black", # Germany's flag has black
|
101 |
+
"el": "blue", # Greece's flag has blue
|
102 |
+
"en": "red", # UK/US flags have red
|
103 |
+
"es": "yellow", # Spain's flag has yellow
|
104 |
+
"fa": "green", # Iran's flag has green
|
105 |
+
"fi": "blue", # Finland's flag is blue and white
|
106 |
+
"fr": "blue", # France's flag has blue
|
107 |
+
"gu": "saffron", # India (Gujarat) flag's color
|
108 |
+
"ha": "green", # Nigeria's flag has green
|
109 |
+
"hi": "orange", # India's flag has orange
|
110 |
+
"hu": "red", # Hungary's flag has red
|
111 |
+
"id": "red", # Indonesia's flag is red and white
|
112 |
+
"it": "green", # Italy's flag has green
|
113 |
+
"ja": "red", # Japan's flag has a red sun
|
114 |
+
"jv": "brown", # Associated with traditional Javanese culture
|
115 |
+
"kn": "yellow", # Karnataka (Indian state) flag has yellow
|
116 |
+
"ko": "blue", # South Korea's flag has blue
|
117 |
+
"lt": "yellow", # Lithuania's flag has yellow
|
118 |
+
"mr": "saffron", # Marathi culture often uses saffron
|
119 |
+
"nl": "orange", # The Netherlands is often associated with orange
|
120 |
+
"no": "red", # Norway's flag is red, white, and blue
|
121 |
+
"yo": "green", # Nigeria's flag for Yoruba-speaking people
|
122 |
+
"zh": "red" # China's flag is red
|
123 |
+
}
|
124 |
+
|
125 |
+
|
126 |
css = '''
|
127 |
#gen_btn{height: 100%}
|
128 |
#title{text-align: center}
|
|
|
164 |
)
|
165 |
with gr.Row():
|
166 |
user_text = gr.Textbox(label='Input text', placeholder='Type something here...')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
with gr.Column(scale=0):
|
168 |
submit_btn = gr.Button("Submit")
|
169 |
score = gr.HTML(
|
|
|
171 |
label="Output"
|
172 |
)
|
173 |
|
174 |
+
with gr.Row():
|
175 |
+
language_choice = gr.Dropdown(
|
176 |
+
choices=language_options,
|
177 |
+
label="Choose a language",
|
178 |
+
info="Type to search",
|
179 |
+
value="en",
|
180 |
+
allow_custom_value=True,
|
181 |
+
)
|
182 |
+
|
183 |
+
loaded_languages = gr.HighlightedText(
|
184 |
+
value = [('Currently loaded languages:', None), ('English', 'en')],
|
185 |
+
label="",
|
186 |
+
combine_adjacent=True,
|
187 |
+
show_legend=False, #True,
|
188 |
+
color_map=color_map)
|
189 |
+
|
190 |
+
#loaded_languages = gr.Markdown("Currently loaded languages: en")
|
191 |
|
192 |
gr.Examples(examples=example_inputs, inputs=user_text)
|
193 |
|
194 |
gr.Markdown(
|
195 |
"""
|
196 |
+
---
|
197 |
+
|
198 |
+
## 🛈 **Additional Information**
|
199 |
+
This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package.
|
200 |
+
|
201 |
This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly.
|
202 |
|
203 |
This scorer can be used to filter useful information from large text corpora in many languages.
|
|
|
|
|
204 |
"""
|
205 |
)
|
206 |
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
git+https://github.com/lightblue-tech/shitsu.git
|
2 |
hf-transfer
|
3 |
-
huggingface_hub[hf_transfer]
|
|
|
|
1 |
git+https://github.com/lightblue-tech/shitsu.git
|
2 |
hf-transfer
|
3 |
+
huggingface_hub[hf_transfer]
|
4 |
+
pydantic
|