Spaces:

ShikharLLM
/

science

Runtime error

App Files Files Community

ShikharLLM commited on Feb 24

Commit

ad29ed2

verified ·

1 Parent(s): ed88ebf

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -24

app.py CHANGED Viewed

@@ -1,31 +1,57 @@
-import os
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Retrieve the Hugging Face token from environment variables
 hf_token = os.getenv("HUGGINGFACE_TOKEN")
-# Load the tokenizer and model with the token
-model_id = "meta-llama/Llama-3.2-3B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto", use_auth_token=hf_token)
-# Define the prediction function
-def generate_text(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=131072)
-    with torch.no_grad():
-        outputs = model.generate(**inputs, max_length=131072)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Create the Gradio interface
-interface = gr.Interface(
-    fn=generate_text,
-    inputs=gr.Textbox(lines=10, label="Input Prompt"),
-    outputs=gr.Textbox(lines=10, label="Generated Text"),
-    title="Meta Llama 3.2 3B Instruct Model",
-    description="Generate text using the Meta Llama 3.2 3B Instruct model with a context length of up to 128,000 tokens."
-)
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
+import requests
+import os
 # Retrieve the Hugging Face token from environment variables
 hf_token = os.getenv("HUGGINGFACE_TOKEN")
+if not HF_TOKEN:
+    raise ValueError("Please set your Hugging Face API token as HF_API_TOKEN in the Secrets settings.")
+# Model details
+MODEL_ID = "meta-llama/llama-3-3b-instruct"  # Change to the exact model ID
+API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
+# Headers for API requests
+HEADERS = {
+    "Authorization": f"Bearer {HF_TOKEN}",
+    "Content-Type": "application/json"
+}
+def chat_with_llama(prompt, temperature=0.7, max_tokens=256):
+    """Sends a request to Hugging Face Inference API and returns the response."""
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "temperature": temperature,
+            "max_new_tokens": max_tokens,
+            "top_p": 0.95
+        }
+    }
+    response = requests.post(API_URL, headers=HEADERS, json=payload)
+    if response.status_code == 200:
+        return response.json()[0]["generated_text"]
+    else:
+        return f"Error {response.status_code}: {response.text}"
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("<h2 align='center'>🚀 Llama 3.2 3B Instruct Chatbot</h2>")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(label="Enter your prompt:", placeholder="Ask me anything...", lines=3)
+            temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
+            max_tokens = gr.Slider(50, 1024, value=256, label="Max Tokens")
+            submit = gr.Button("Generate Response")
+        with gr.Column():
+            output = gr.Textbox(label="AI Response", interactive=False, lines=10)
+    submit.click(chat_with_llama, inputs=[prompt, temperature, max_tokens], outputs=output)
+# Launch app
 if __name__ == "__main__":
+    demo.launch()