Spaces:

large-traversaal
/

Mantra-14B-Demo

Sleeping

App Files Files Community

Mantra-14B-Demo / app.py

1024m

Update app.py

f309c7a verified 3 months ago

raw

history blame contribute delete

5.51 kB

	import gradio as gr
	from theme import fast_rtc_theme
	import torch
	import json
	import uuid
	import os
	import time
	import pytz
	from datetime import datetime
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	from huggingface_hub import CommitScheduler
	from pathlib import Path
	import spaces
	os.system("apt-get update && apt-get install -y libstdc++6")
	token = os.environ["HF_TOKEN"]
	model_id = "large-traversaal/Mantra-14B"
	model = AutoModelForCausalLM.from_pretrained(model_id, token=token, trust_remote_code=True, torch_dtype=torch.bfloat16)
	tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
	terminators = [tokenizer.eos_token_id]
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)
	log_folder = Path("logs")
	log_folder.mkdir(parents=True, exist_ok=True)
	log_file = log_folder / f"chat_log_{uuid.uuid4()}.json"
	scheduler = CommitScheduler(repo_id="large-traversaal/mantra-14b-user-interaction-log", repo_type="dataset", folder_path=log_folder, path_in_repo="data", every=0.01, token=token)
	timezone = pytz.timezone("UTC")
	@spaces.GPU(duration=60)
	def chat(message, history, temperature, do_sample, max_tokens, top_p):
	start_time = time.time()
	timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z")
	conversation_history = []
	for item in history:
	conversation_history.append({"role": "user", "content": item[0]})
	if item[1] is not None:
	conversation_history.append({"role": "assistant", "content": item[1]})
	conversation_history.append({"role": "user", "content": message})
	messages = tokenizer.apply_chat_template(conversation_history, tokenize=False, add_generation_prompt=True)
	model_inputs = tokenizer([messages], return_tensors="pt").to(device)
	streamer = TextIteratorStreamer(tokenizer, timeout=70.0, skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(model_inputs, streamer=streamer, max_new_tokens=max_tokens, do_sample=do_sample, temperature=temperature, top_p=top_p, eos_token_id=terminators,)
	if temperature == 0:
	generate_kwargs["do_sample"] = False
	generation_thread = Thread(target=model.generate, kwargs=generate_kwargs)
	generation_thread.start()
	partial_text = ""
	for new_text in streamer:
	partial_text += new_text
	yield partial_text
	response_time = round(time.time() - start_time, 2)
	log_data = {"timestamp": timestamp,"input": message,"output": partial_text,"response_time": response_time,"temperature": temperature,"do_sample": do_sample,"max_tokens": max_tokens,"top_p": top_p}
	with scheduler.lock:
	with log_file.open("a", encoding="utf-8") as f:
	f.write(json.dumps(log_data, ensure_ascii=False) + "\n")
	def clear_chat():
	return [], []
	def export_chat(history):
	if not history:
	return None # No chat history to export
	file_path = "chat_history.txt"
	with open(file_path, "w", encoding="utf-8") as f:
	for msg in history:
	f.write(f"User: {msg[0]}\nBot: {msg[1]}\n")
	return file_path
	with gr.Blocks(theme=fast_rtc_theme) as demo:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("#### ⚙️🛠 Configure Settings")
	temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.1, label="Temperature", interactive=True)
	do_sample = gr.Checkbox(label="Sampling", value=True, interactive=True)
	max_tokens = gr.Slider(minimum=128, maximum=4096, step=1, value=1024, label="max_new_tokens", interactive=True)
	top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.2, label="top_p", interactive=True)
	with gr.Column(scale=3):
	gr.Markdown("# Chat With Mantra-14B 💬 ")
	chat_interface = gr.ChatInterface(fn=chat,
	examples=[["What is the English translation of: 'इस मॉडल को हिंदी और अंग्रेजी डेटा पर प्रशिक्षित किया गया था'?"],
	["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?"],
	["How do you play fetch? A) Throw the object for the dog to bring back to you. B) Get the object and bring it back to the dog."]],
	additional_inputs=[temperature, do_sample, max_tokens, top_p],
	stop_btn="⏹ Stop",
	description="Mantra-14B is a bilingual instruction-tuned LLM for Hindi and English, trained on a mixed datasets composed of 485K Hindi-English samples.",)
	with gr.Row():
	clear_btn = gr.Button("🧹 Clear Chat", variant="primary")
	export_btn = gr.Button("📥 Export Chat", variant="primary")
	clear_btn.click(fn=clear_chat, outputs=[chat_interface.chatbot, chat_interface.chatbot_value])
	export_btn.click(fn=export_chat, inputs=[chat_interface.chatbot], outputs=[gr.File()])
	demo.launch()