ShikharLLM commited on
Commit
ad29ed2
·
verified ·
1 Parent(s): ed88ebf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -24
app.py CHANGED
@@ -1,31 +1,57 @@
1
- import os
2
  import gradio as gr
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
  # Retrieve the Hugging Face token from environment variables
7
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Load the tokenizer and model with the token
10
- model_id = "meta-llama/Llama-3.2-3B-Instruct"
11
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
12
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto", use_auth_token=hf_token)
13
-
14
- # Define the prediction function
15
- def generate_text(prompt):
16
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=131072)
17
- with torch.no_grad():
18
- outputs = model.generate(**inputs, max_length=131072)
19
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
20
-
21
- # Create the Gradio interface
22
- interface = gr.Interface(
23
- fn=generate_text,
24
- inputs=gr.Textbox(lines=10, label="Input Prompt"),
25
- outputs=gr.Textbox(lines=10, label="Generated Text"),
26
- title="Meta Llama 3.2 3B Instruct Model",
27
- description="Generate text using the Meta Llama 3.2 3B Instruct model with a context length of up to 128,000 tokens."
28
- )
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  if __name__ == "__main__":
31
- interface.launch()
 
 
 
1
  import gradio as gr
2
+ import requests
3
+ import os
4
 
5
  # Retrieve the Hugging Face token from environment variables
6
  hf_token = os.getenv("HUGGINGFACE_TOKEN")
7
+ if not HF_TOKEN:
8
+ raise ValueError("Please set your Hugging Face API token as HF_API_TOKEN in the Secrets settings.")
9
+
10
+ # Model details
11
+ MODEL_ID = "meta-llama/llama-3-3b-instruct" # Change to the exact model ID
12
+ API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
13
+
14
+ # Headers for API requests
15
+ HEADERS = {
16
+ "Authorization": f"Bearer {HF_TOKEN}",
17
+ "Content-Type": "application/json"
18
+ }
19
+
20
+ def chat_with_llama(prompt, temperature=0.7, max_tokens=256):
21
+ """Sends a request to Hugging Face Inference API and returns the response."""
22
+ payload = {
23
+ "inputs": prompt,
24
+ "parameters": {
25
+ "temperature": temperature,
26
+ "max_new_tokens": max_tokens,
27
+ "top_p": 0.95
28
+ }
29
+ }
30
 
31
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ if response.status_code == 200:
34
+ return response.json()[0]["generated_text"]
35
+ else:
36
+ return f"Error {response.status_code}: {response.text}"
37
+
38
+ # Gradio UI
39
+ with gr.Blocks() as demo:
40
+ gr.Markdown("<h2 align='center'>🚀 Llama 3.2 3B Instruct Chatbot</h2>")
41
+
42
+ with gr.Row():
43
+ with gr.Column():
44
+ prompt = gr.Textbox(label="Enter your prompt:", placeholder="Ask me anything...", lines=3)
45
+ temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
46
+ max_tokens = gr.Slider(50, 1024, value=256, label="Max Tokens")
47
+ submit = gr.Button("Generate Response")
48
+
49
+ with gr.Column():
50
+ output = gr.Textbox(label="AI Response", interactive=False, lines=10)
51
+
52
+ submit.click(chat_with_llama, inputs=[prompt, temperature, max_tokens], outputs=output)
53
+
54
+ # Launch app
55
  if __name__ == "__main__":
56
+ demo.launch()
57
+