Spaces:

rasyosef
/

Llama-3.2-400M-Amharic-Instruct

Running

App Files Files Community

rasyosef commited on Mar 19

Commit

d15c04c

verified ·

1 Parent(s): cb638de

Create app.py

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
+from threading import Thread
+model_id = "rasyosef/Llama-3.2-400M-Amharic-Instruct-Poems-Stories-Wikipedia"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float32,
+    device_map="cuda" if torch.cuda.is_available() else "cpu"
+  )
+llama3_am = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    eos_token_id=tokenizer.eos_token_id,
+    device_map="cuda" if torch.cuda.is_available() else "cpu"
+  )
+# Function that accepts a prompt and generates text
+def generate(message, chat_history, max_new_tokens=64):
+  history = []
+  for sent, received in chat_history:
+    history.append({"role": "user", "content": sent})
+    history.append({"role": "assistant", "content": received})
+  history.append({"role": "user", "content": message})
+  if len(tokenizer.apply_chat_template(history)) > 512:
+    yield "chat history is too long"
+  else:
+    # Streamer
+    streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
+    thread = Thread(
+        target=llama3_am,
+        kwargs={
+            "text_inputs":history,
+            "max_new_tokens":max_new_tokens,
+            "repetition_penalty":1.1,
+            "streamer":streamer
+            }
+        )
+    thread.start()
+    generated_text = ""
+    for word in streamer:
+      generated_text += word
+      response = generated_text.strip()
+      yield response
+# Chat interface with gradio
+with gr.Blocks() as demo:
+  gr.Markdown("""
+  # Llama 3.2 400M Amharic Chatbot Demo
+  """)
+  tokens_slider = gr.Slider(8, 256, value=64, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")
+  chatbot = gr.ChatInterface(
+    chatbot=gr.Chatbot(height=400),
+    fn=generate,
+    additional_inputs=[tokens_slider],
+    stop_btn=None,
+    examples=[
+        ["ሰላም"],
+        ["ሰላም፣ እንዴት ነህ?"],
+        ["አንተ ማነህ?"],
+        ["ግጥም ፃፍልኝ"],
+        ["ስለ ይቅርታ ግጥም ጻፍልኝ"],
+        ["አንድ ተረት አጫውተኝ"],
+        ["ስለ ጅብና አንበሳ ተረት ንገረኝ"],
+        ["ቀልድ ንገረኝ"],
+        ["ስለ ስራ አጥነት አንድ ቀልድ ንገረኝ"],
+        ["ዳግማዊ ቴዎድሮስ ማን ነው?"],
+        ["ዳግማዊ ምንሊክ ማን ነው?"],
+        ["ስለ አዲስ አበባ ዩኒቨርስቲ ጥቂት እውነታዎችን አጫውተኝ"],
+        ["ስለ ጃፓን ጥቂት እውነታዎችን ንገረኝ"],
+        ["ስለ ማይክሮሶፍት ጥቂት እውነታዎችን ንገረኝ"],
+        ["ጉግል ምንድን ነው?"],
+        ["ቢትኮይን ምንድን ነው?"],
+      ]
+  )
+demo.queue().launch(debug=True)