alexneakameni commited on
Commit
231f99f
·
verified ·
1 Parent(s): bb6f2a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -12
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
4
 
5
  # Load model and tokenizer
6
  model_name = "alexneakameni/language_detection"
@@ -8,38 +9,91 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Get label mapping
12
  id2label = model.config.id2label
13
 
 
 
 
 
 
 
 
 
 
14
  def predict_language(text, top_k=5):
15
  """Predicts the top-k languages for the given text."""
16
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
 
 
17
  with torch.no_grad():
18
  logits = model(**inputs).logits
19
-
20
  probs = torch.nn.functional.softmax(logits, dim=-1).squeeze()
21
  top_probs, top_indices = torch.topk(probs, top_k)
22
-
23
- results = [f"{id2label[idx.item()]}: {prob:.4f}" for prob, idx in zip(top_probs, top_indices)]
 
 
 
24
  return "\n".join(results)
25
 
 
26
  # Create Gradio interface
27
  demo = gr.Interface(
28
  fn=predict_language,
29
  inputs=[
30
  gr.Textbox(label="Enter text", placeholder="Type a sentence here..."),
31
- gr.Slider(1, 10, value=5, step=1, label="Top-k Languages")
32
  ],
33
  outputs=gr.Textbox(label="Predicted Languages"),
34
  title="🌍 Language Detection",
35
  description="Detects the language of a given text using a fine-tuned BERT model. Returns the top-k most probable languages.",
36
- examples=[
37
- ["Hello, how are you?", 5],
38
- ["Bonjour, comment ça va?", 5],
39
- ["Hola, ¿cómo estás?", 5],
40
- ["Hallo, wie geht es dir?", 5],
41
- ["Привет, как дела?", 5]
42
- ]
43
  )
44
 
45
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ import pycountry
5
 
6
  # Load model and tokenizer
7
  model_name = "alexneakameni/language_detection"
 
9
  model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
+ sentences = [
13
+ # English
14
+ "Although artificial intelligence has made significant progress in recent years, there are still many challenges to overcome before it can truly replicate human intelligence.",
15
+ # French
16
+ "Bien que l'intelligence artificielle ait fait des progrès considérables ces dernières années, de nombreux défis restent à relever avant qu'elle ne puisse véritablement imiter l'intelligence humaine.",
17
+ # Spanish
18
+ "A pesar de los importantes avances en inteligencia artificial en los últimos años, aún existen muchos desafíos por superar antes de que pueda replicar verdaderamente la inteligencia humana.",
19
+ # German
20
+ "Obwohl künstliche Intelligenz in den letzten Jahren erhebliche Fortschritte gemacht hat, gibt es noch viele Herausforderungen, die überwunden werden müssen, bevor sie die menschliche Intelligenz wirklich nachbilden kann.",
21
+ # Italian
22
+ "Sebbene l'intelligenza artificiale abbia fatto progressi significativi negli ultimi anni, ci sono ancora molte sfide da affrontare prima che possa davvero replicare l'intelligenza umana.",
23
+ # Portuguese
24
+ "Embora a inteligência artificial tenha avançado significativamente nos últimos anos, ainda há muitos desafios a superar antes que ela possa realmente imitar a inteligência humana.",
25
+ # Dutch
26
+ "Hoewel kunstmatige intelligentie de afgelopen jaren aanzienlijke vooruitgang heeft geboekt, zijn er nog veel uitdagingen te overwinnen voordat het echt menselijke intelligentie kan nabootsen.",
27
+ # Russian
28
+ "Несмотря на значительный прогресс в области искусственного интеллекта в последние годы, все еще остается много проблем, которые необходимо решить, прежде чем он сможет действительно имитировать человеческий интеллект.",
29
+ # Arabic
30
+ "على الرغم من التقدم الكبير الذي أحرزته الذكاء الاصطناعي في السنوات الأخيرة، لا تزال هناك العديد من التحديات التي يجب التغلب عليها قبل أن يتمكن من محاكاة الذكاء البشري حقًا.",
31
+ # Hindi
32
+ "हालांकि कृत्रिम बुद्धिमत्ता ने हाल के वर्षों में उल्लेखनीय प्रगति की है, फिर भी कई चुनौतियाँ बनी हुई हैं जिन्हें पार किए बिना यह वास्तव में मानव बुद्धिमत्ता की नकल नहीं कर सकती।",
33
+ # Chinese (Simplified)
34
+ "尽管近年来人工智能取得了重大进展,但仍然存在许多挑战,需要克服这些挑战才能真正复制人类智能。",
35
+ # Japanese
36
+ "近年、人工知能は大きな進歩を遂げましたが、人間の知能を本当に再現するにはまだ多くの課題を克服する必要があります。",
37
+ # Korean
38
+ "인공지능이 최근 몇 년 동안 상당한 발전을 이루었음에도 불구하고, 인간의 지능을 진정으로 재현하기 위해서는 아직 극복해야 할 많은 도전 과제가 남아 있습니다.",
39
+ # Turkish
40
+ "Yapay zeka son yıllarda önemli ilerlemeler kaydetmiş olsa da, insan zekasını gerçekten taklit edebilmesi için hala birçok zorluk aşılmalıdır.",
41
+ # Polish
42
+ "Chociaż sztuczna inteligencja poczyniła w ostatnich latach znaczne postępy, nadal istnieje wiele wyzwań do pokonania, zanim będzie mogła naprawdę naśladować ludzką inteligencję.",
43
+ # Greek
44
+ "Αν και η τεχνητή νοημοσύνη έχει σημειώσει σημαντική πρόοδο τα τελευταία χρόνια, εξακολουθούν να υπάρχουν πολλές προκλήσεις που πρέπει να ξεπεραστούν πριν μπορέσει πραγματικά να αναπαραγάγει την ανθρώπινη νοημοσύνη.",
45
+ # Hebrew
46
+ "למרות שהבינה המלאכותית התקדמה באופן משמעותי בשנים האחרונות, עדיין ישנם אתגרים רבים שיש להתגבר עליהם לפני שתוכל באמת לשחזר את האינטליגנציה האנושית.",
47
+ # Swahili
48
+ "Ingawa akili bandia imepiga hatua kubwa katika miaka ya hivi karibuni, bado kuna changamoto nyingi zinazopaswa kushindwa kabla ya kuweza kuiga akili ya binadamu kwa kweli.",
49
+ # Vietnamese
50
+ "Mặc dù trí tuệ nhân tạo đã đạt được những tiến bộ đáng kể trong những năm gần đây, nhưng vẫn còn nhiều thách thức cần vượt qua trước khi nó có thể thực sự tái tạo trí thông minh của con người.",
51
+ # Thai
52
+ "แม้ว่าปัญญาประดิษฐ์จะมีความก้าวหน้าอย่างมากในช่วงไม่กี่ปีที่ผ่านมา แต่ยังคงมีความท้าทายอีกมากที่ต้องเอาชนะก่อนที่มันจะสามารถเลียนแบบสติปัญญาของมนุษย์ได้อย่างแท้จริง.",
53
+ ]
54
+
55
+
56
  # Get label mapping
57
  id2label = model.config.id2label
58
 
59
+
60
+ def get_iso1_code(code: str):
61
+ lang = code.split("_")[0] # Extract the first part before '_'
62
+ try:
63
+ return pycountry.languages.get(alpha_3=lang).name # Get ISO 639-1
64
+ except AttributeError:
65
+ return lang # Fallback to original if no match
66
+
67
+
68
  def predict_language(text, top_k=5):
69
  """Predicts the top-k languages for the given text."""
70
+ inputs = tokenizer(
71
+ text, return_tensors="pt", truncation=True, padding=True, max_length=512
72
+ ).to(device)
73
  with torch.no_grad():
74
  logits = model(**inputs).logits
75
+
76
  probs = torch.nn.functional.softmax(logits, dim=-1).squeeze()
77
  top_probs, top_indices = torch.topk(probs, top_k)
78
+
79
+ results = [
80
+ f"{get_iso1_code(id2label[idx.item()])} - {id2label[idx.item()]}: {prob:.4f}"
81
+ for prob, idx in zip(top_probs, top_indices)
82
+ ]
83
  return "\n".join(results)
84
 
85
+
86
  # Create Gradio interface
87
  demo = gr.Interface(
88
  fn=predict_language,
89
  inputs=[
90
  gr.Textbox(label="Enter text", placeholder="Type a sentence here..."),
91
+ gr.Slider(1, 10, value=5, step=1, label="Top-k Languages"),
92
  ],
93
  outputs=gr.Textbox(label="Predicted Languages"),
94
  title="🌍 Language Detection",
95
  description="Detects the language of a given text using a fine-tuned BERT model. Returns the top-k most probable languages.",
96
+ examples=[[sent, 5] for sent in sentences],
 
 
 
 
 
 
97
  )
98
 
99
  demo.launch()