1024m commited on
Commit
664df97
·
verified ·
1 Parent(s): dc5caa8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import time
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
+ from threading import Thread
6
+ print("Loading model and tokenizer...")
7
+ model_name = "large-traversaal/Phi-4-Hindi"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name,
11
+ torch_dtype=torch.float16,
12
+ load_in_4bit=True,
13
+ device_map="auto"
14
+ )
15
+ print("Model and tokenizer loaded successfully!")
16
+ def generate_response(message, temperature, max_new_tokens, top_p):
17
+ print(f"Input: {message}")
18
+ start_time = time.time()
19
+ inputs = tokenizer(message, return_tensors="pt").to(model.device)
20
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
21
+ gen_kwargs = {
22
+ "input_ids": inputs["input_ids"],
23
+ "streamer": streamer,
24
+ "temperature": temperature,
25
+ "max_new_tokens": max_new_tokens,
26
+ "top_p": top_p,
27
+ "do_sample": True if temperature > 0 else False,
28
+ }
29
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
30
+ thread.start()
31
+ result = []
32
+ for text in streamer:
33
+ result.append(text)
34
+ yield "".join(result)
35
+ end_time = time.time()
36
+ time_taken = end_time - start_time
37
+ output_text = "".join(result)
38
+ print(f"Output: {output_text}")
39
+ print(f"Time taken: {time_taken:.2f} seconds")
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown("# Phi-4-Hindi Demo")
42
+ with gr.Row():
43
+ with gr.Column():
44
+ input_text = gr.Textbox(
45
+ label="Input",
46
+ placeholder="Enter your text here...",
47
+ lines=5
48
+ )
49
+ with gr.Row():
50
+ with gr.Column():
51
+ temperature = gr.Slider(
52
+ minimum=0.0,
53
+ maximum=1.0,
54
+ value=0.1,
55
+ step=0.01,
56
+ label="Temperature"
57
+ )
58
+ with gr.Column():
59
+ max_new_tokens = gr.Slider(
60
+ minimum=50,
61
+ maximum=1000,
62
+ value=400,
63
+ step=10,
64
+ label="Max New Tokens"
65
+ )
66
+ with gr.Column():
67
+ top_p = gr.Slider(
68
+ minimum=0.0,
69
+ maximum=1.0,
70
+ value=0.1,
71
+ step=0.01,
72
+ label="Top P"
73
+ )
74
+ with gr.Row():
75
+ clear_btn = gr.Button("Clear")
76
+ send_btn = gr.Button("Send", variant="primary")
77
+ with gr.Column():
78
+ output_text = gr.Textbox(
79
+ label="Output",
80
+ lines=15
81
+ )
82
+ send_btn.click(
83
+ fn=generate_response,
84
+ inputs=[input_text, temperature, max_new_tokens, top_p],
85
+ outputs=output_text
86
+ )
87
+ clear_btn.click(
88
+ fn=lambda: ("", "", "", ""),
89
+ inputs=None,
90
+ outputs=[input_text, output_text]
91
+ )
92
+ if __name__ == "__main__":
93
+ demo.queue().launch()