sophicist commited on
Commit
2d553a1
·
1 Parent(s): 42b3cd3

added new changes

Browse files
Files changed (1) hide show
  1. app.py +75 -21
app.py CHANGED
@@ -1,22 +1,76 @@
1
  import gradio as gr
2
- from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
3
- from dotenv import load_dotenv
4
- import os
5
- from huggingface_hub import login
6
-
7
- # load_dotenv() # Load environment variables from .env file
8
- # huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
9
-
10
- # # Log in using the token
11
- # login(token=huggingface_token)
12
- # Load the model and tokenizer from Hugging Face Hub
13
- model_name = "Aesopskenya/translator"
14
- tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
15
- model = MBartForConditionalGeneration.from_pretrained(model_name)
16
-
17
- def translate_gikuyu(sentence):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Tokenize input
19
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=128)
20
 
21
  # Generate translation
22
  outputs = model.generate(inputs.input_ids, max_length=128)
@@ -27,13 +81,13 @@ def translate_gikuyu(sentence):
27
 
28
  # Define Gradio interface
29
  iface = gr.Interface(
30
- fn=translate_gikuyu,
31
  inputs="text",
32
  outputs="text",
33
- title="Gikuyu-English Translator",
34
- description="Enter a Gikuyu sentence, and the model will translate it into English."
35
  )
36
 
37
- # Launch the interface
38
  if __name__ == "__main__":
39
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
+ from transformers import (
3
+ MBart50TokenizerFast,
4
+ MBartForConditionalGeneration,
5
+ AutoTokenizer,
6
+ AutoModelForSequenceClassification,
7
+ )
8
+ import torch
9
+
10
+ # Load the language detection model
11
+ lang_detector_name = "Aesopskenya/LanguageDetector"
12
+ lang_tokenizer = AutoTokenizer.from_pretrained(lang_detector_name)
13
+ lang_model = AutoModelForSequenceClassification.from_pretrained(lang_detector_name)
14
+
15
+ # Define the language mapping to models
16
+ lang_to_model = {
17
+ "Gikuyu": "Aesopskenya/translator",
18
+ "Kalenjin": "Aesopskenya/KalenjinTranslator",
19
+ "Kamba": "Aesopskenya/KambaTranslation",
20
+ "Luo": "Aesopskenya/LuoTranslator",
21
+ "Sheng": "Aesopskenya/ShengTranslation",
22
+ }
23
+
24
+ # Reverse mapper for language detection
25
+ reverse_mapper = {
26
+ 0: "English",
27
+ 1: "Sheng",
28
+ 2: "Other",
29
+ 3: "Luhya",
30
+ 4: "Kamba",
31
+ 5: "Gikuyu",
32
+ 6: "Kalenjin",
33
+ 7: "Luo",
34
+ }
35
+
36
+ # Function to detect language
37
+ def detect_language(text):
38
+ inputs = lang_tokenizer(
39
+ text,
40
+ max_length=128,
41
+ padding=True,
42
+ truncation=True,
43
+ return_tensors="pt",
44
+ )
45
+ with torch.no_grad():
46
+ outputs = lang_model(**inputs)
47
+ logits = outputs.logits
48
+ prediction = torch.argmax(logits, dim=-1).item()
49
+ return reverse_mapper[prediction]
50
+
51
+ # Function to load the appropriate model and tokenizer
52
+ def load_model_and_tokenizer(language):
53
+ model_name = lang_to_model.get(language)
54
+ if model_name:
55
+ tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
56
+ model = MBartForConditionalGeneration.from_pretrained(model_name)
57
+ return tokenizer, model
58
+ return None, None
59
+
60
+ # Function to translate text
61
+ def translate_text(text):
62
+ # Detect the language
63
+ detected_language = detect_language(text)
64
+ if detected_language not in lang_to_model:
65
+ return f"Language '{detected_language}' is not supported for translation."
66
+
67
+ # Load the appropriate model and tokenizer
68
+ tokenizer, model = load_model_and_tokenizer(detected_language)
69
+ if not tokenizer or not model:
70
+ return "Error loading the translation model."
71
+
72
  # Tokenize input
73
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
74
 
75
  # Generate translation
76
  outputs = model.generate(inputs.input_ids, max_length=128)
 
81
 
82
  # Define Gradio interface
83
  iface = gr.Interface(
84
+ fn=translate_text,
85
  inputs="text",
86
  outputs="text",
87
+ title="Multi-Language Translator",
88
+ description="Enter a sentence, and the model will detect its language and translate it into English.",
89
  )
90
 
91
+ # Launch the app
92
  if __name__ == "__main__":
93
  iface.launch(server_name="0.0.0.0", server_port=7860)