Spaces:
Sleeping
Sleeping
File size: 3,137 Bytes
664df97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
import torch
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
print("Loading model and tokenizer...")
model_name = "large-traversaal/Phi-4-Hindi"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
load_in_4bit=True,
device_map="auto"
)
print("Model and tokenizer loaded successfully!")
def generate_response(message, temperature, max_new_tokens, top_p):
print(f"Input: {message}")
start_time = time.time()
inputs = tokenizer(message, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
gen_kwargs = {
"input_ids": inputs["input_ids"],
"streamer": streamer,
"temperature": temperature,
"max_new_tokens": max_new_tokens,
"top_p": top_p,
"do_sample": True if temperature > 0 else False,
}
thread = Thread(target=model.generate, kwargs=gen_kwargs)
thread.start()
result = []
for text in streamer:
result.append(text)
yield "".join(result)
end_time = time.time()
time_taken = end_time - start_time
output_text = "".join(result)
print(f"Output: {output_text}")
print(f"Time taken: {time_taken:.2f} seconds")
with gr.Blocks() as demo:
gr.Markdown("# Phi-4-Hindi Demo")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Input",
placeholder="Enter your text here...",
lines=5
)
with gr.Row():
with gr.Column():
temperature = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.1,
step=0.01,
label="Temperature"
)
with gr.Column():
max_new_tokens = gr.Slider(
minimum=50,
maximum=1000,
value=400,
step=10,
label="Max New Tokens"
)
with gr.Column():
top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.1,
step=0.01,
label="Top P"
)
with gr.Row():
clear_btn = gr.Button("Clear")
send_btn = gr.Button("Send", variant="primary")
with gr.Column():
output_text = gr.Textbox(
label="Output",
lines=15
)
send_btn.click(
fn=generate_response,
inputs=[input_text, temperature, max_new_tokens, top_p],
outputs=output_text
)
clear_btn.click(
fn=lambda: ("", "", "", ""),
inputs=None,
outputs=[input_text, output_text]
)
if __name__ == "__main__":
demo.queue().launch() |