Spaces:

druvx13
/

Ztar

Sleeping

App Files Files Community

druvx13 commited on May 28

Commit

f0d2cc4

verified ·

1 Parent(s): 1e80bae

Create app.py

Browse files

Files changed (1) hide show

app.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import requests
+import os
+from huggingface_hub import hf_hub_download
+# Model configuration
+MODEL_REPO = "druvx13/gpt2-Q4_K_M-GGUF"
+MODEL_FILE = "gpt2-q4_k_m.gguf"
+SERVER_PORT = 8080
+# Download model if not exists
+def ensure_model():
+    if not os.path.exists(MODEL_FILE):
+        print("Downloading model...")
+        hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=".")
+    return MODEL_FILE
+# Start llama.cpp server (must be done before launching Gradio)
+os.system(f"./llama-server --hf-repo {MODEL_REPO} --hf-file {ensure_model()} -c 2048 &")
+def generate_text(prompt, max_tokens=100, temp=0.7):
+    try:
+        response = requests.post(
+            f"http://localhost:{SERVER_PORT}/completion",
+            json={
+                "prompt": prompt,
+                "stream": False,
+                "temperature": temp,
+                "n_predict": max_tokens
+            }
+        )
+        return response.json()["content"]
+    except Exception as e:
+        return f"Error: {str(e)}. Ensure server is running."
+# UI Configuration
+with gr.Blocks(theme="soft") as demo:
+    gr.Markdown("# GPT-2 Text Generation (GGUF Version)\nPowered by llama.cpp and HuggingFace Spaces")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Input Prompt",
+                placeholder="Enter your prompt here...",
+                lines=5
+            )
+            max_tokens = gr.Slider(10, 500, value=100, label="Max Output Tokens")
+            temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
+            submit = gr.Button("Generate", variant="primary")
+        output = gr.Textbox(label="Generated Text", lines=10)
+    submit.click(
+        fn=generate_text,
+        inputs=[prompt, max_tokens, temp],
+        outputs=output
+    )
+demo.launch(server_port=7860)