Spaces:
Sleeping
Sleeping
File size: 1,873 Bytes
072b88d ad29ed2 a1ba27b ed921e2 5c5ee82 ad29ed2 cbed0e7 ad29ed2 ed921e2 ad29ed2 072b88d ad29ed2 072b88d ad29ed2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
import requests
import os
# Retrieve the Hugging Face token from environment variables
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
if not HF_TOKEN:
raise ValueError("Please set your Hugging Face API token as HF_API_TOKEN in the Secrets settings.")
# Model details
MODEL_ID = "meta-llama/llama-3.2-70b-instruct" # Change to the exact model ID
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
# Headers for API requests
HEADERS = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
def chat_with_llama(prompt, temperature=0.7, max_tokens=256):
"""Sends a request to Hugging Face Inference API and returns the response."""
payload = {
"inputs": prompt,
"parameters": {
"temperature": temperature,
"max_new_tokens": max_tokens,
"top_p": 0.95
}
}
response = requests.post(API_URL, headers=HEADERS, json=payload)
if response.status_code == 200:
return response.json()[0]["generated_text"]
else:
return f"Error {response.status_code}: {response.text}"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("<h2 align='center'>🚀 Llama 3.2 3B Instruct Chatbot</h2>")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Enter your prompt:", placeholder="Ask me anything...", lines=3)
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
max_tokens = gr.Slider(50, 1024, value=256, label="Max Tokens")
submit = gr.Button("Generate Response")
with gr.Column():
output = gr.Textbox(label="AI Response", interactive=False, lines=10)
submit.click(chat_with_llama, inputs=[prompt, temperature, max_tokens], outputs=output)
# Launch app
if __name__ == "__main__":
demo.launch()
|