versae commited on
Commit
fe1893c
·
1 Parent(s): 4106df7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -3
app.py CHANGED
@@ -1,12 +1,60 @@
1
- import gradio as gr
2
  from huggingface_hub import hf_hub_download
 
3
  import fasttext
4
 
5
  model = fasttext.load_model(hf_hub_download("NbAiLab/nb-nordic-lid", "model.bin"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  def identify(text):
9
- return model.predict(text)
10
 
11
- iface = gr.Interface(fn=identify, inputs="text", outputs="text")
12
  iface.launch()
 
1
+ from typing import Optional, List, Set, Union
2
  from huggingface_hub import hf_hub_download
3
+ import gradio as gr
4
  import fasttext
5
 
6
  model = fasttext.load_model(hf_hub_download("NbAiLab/nb-nordic-lid", "model.bin"))
7
+ model_labels = set(label[-3:] for label in model.get_labels())
8
+
9
+
10
+ def detect_lang(
11
+ text: str,
12
+ langs: Optional[Union[List, Set]]=None,
13
+ threshold: float=-1.0,
14
+ return_proba: bool=False
15
+ ) -> Union[str, Tuple[str, float]]:
16
+ """
17
+ This function takes in a text string and optional arguments for a list or
18
+ set of languages to detect, a threshold for minimum probability of language
19
+ detection, and a boolean for returning the probability of detected language.
20
+ It uses a pre-defined model to predict the language of the text and returns
21
+ the detected ISO-639-3 language code as a string. If the return_proba
22
+ argument is set to True, it will also return a tuple with the language code
23
+ and the probability of detection. If no language is detected, it will
24
+ return "und" as the language code.
25
+
26
+ Args:
27
+ - text (str): The text to detect the language of.
28
+ - langs (List or Set, optional): The list or set of languages to detect in
29
+ the text. Defaults to all languages in the model's labels.
30
+ - threshold (float, optional): The minimum probability for a language to be
31
+ considered detected. Defaults to `-1.0`.
32
+ - return_proba (bool, optional): Whether to return the language code and
33
+ probability of detection as a tuple. Defaults to `False`.
34
+
35
+ Returns:
36
+ str or Tuple[str, float]: The detected language code as a string, or a
37
+ tuple with the language code and probability of detection if
38
+ return_proba is set to True.
39
+ """
40
+ if langs:
41
+ langs = set(langs)
42
+ else:
43
+ langs = model_labels
44
+ raw_prediction = model.predict(text, threshold=threshold, k=-1)
45
+ predictions = [
46
+ (label[-3:], min(probability, 1.0))
47
+ for label, probability in zip(*raw_prediction)
48
+ if label[-3:] in langs
49
+ ]
50
+ if not predictions:
51
+ return [("und", 1.0)] if return_proba else "und"
52
+ else:
53
+ return predictions if return_proba else predictions[0][0]
54
 
55
 
56
  def identify(text):
57
+ return dict(detect_lang(text, return_proba=True))
58
 
59
+ iface = gr.Interface(fn=identify, inputs="text", outputs="label")
60
  iface.launch()