druvx13 commited on
Commit
f0d2cc4
·
verified ·
1 Parent(s): 1e80bae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ # Model configuration
7
+ MODEL_REPO = "druvx13/gpt2-Q4_K_M-GGUF"
8
+ MODEL_FILE = "gpt2-q4_k_m.gguf"
9
+ SERVER_PORT = 8080
10
+
11
+ # Download model if not exists
12
+ def ensure_model():
13
+ if not os.path.exists(MODEL_FILE):
14
+ print("Downloading model...")
15
+ hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=".")
16
+ return MODEL_FILE
17
+
18
+ # Start llama.cpp server (must be done before launching Gradio)
19
+ os.system(f"./llama-server --hf-repo {MODEL_REPO} --hf-file {ensure_model()} -c 2048 &")
20
+
21
+ def generate_text(prompt, max_tokens=100, temp=0.7):
22
+ try:
23
+ response = requests.post(
24
+ f"http://localhost:{SERVER_PORT}/completion",
25
+ json={
26
+ "prompt": prompt,
27
+ "stream": False,
28
+ "temperature": temp,
29
+ "n_predict": max_tokens
30
+ }
31
+ )
32
+ return response.json()["content"]
33
+ except Exception as e:
34
+ return f"Error: {str(e)}. Ensure server is running."
35
+
36
+ # UI Configuration
37
+ with gr.Blocks(theme="soft") as demo:
38
+ gr.Markdown("# GPT-2 Text Generation (GGUF Version)\nPowered by llama.cpp and HuggingFace Spaces")
39
+
40
+ with gr.Row():
41
+ with gr.Column():
42
+ prompt = gr.Textbox(
43
+ label="Input Prompt",
44
+ placeholder="Enter your prompt here...",
45
+ lines=5
46
+ )
47
+ max_tokens = gr.Slider(10, 500, value=100, label="Max Output Tokens")
48
+ temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
49
+ submit = gr.Button("Generate", variant="primary")
50
+
51
+ output = gr.Textbox(label="Generated Text", lines=10)
52
+
53
+ submit.click(
54
+ fn=generate_text,
55
+ inputs=[prompt, max_tokens, temp],
56
+ outputs=output
57
+ )
58
+
59
+ demo.launch(server_port=7860)