Mubbashir Ahmed commited on
Commit
a730ddc
·
1 Parent(s): 7e42f7f

llama 4 model

Browse files
Files changed (2) hide show
  1. app.py +31 -39
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,53 +1,45 @@
 
1
  import gradio as gr
2
- from transformers import AutoProcessor, Llama4ForConditionalGeneration
3
- import torch
4
 
5
- model_id = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
6
-
7
- processor = AutoProcessor.from_pretrained(model_id)
8
- model = Llama4ForConditionalGeneration.from_pretrained(
9
- model_id,
10
- attn_implementation="flex_attention",
11
- device_map="auto",
12
- torch_dtype=torch.bfloat16,
13
  )
14
 
15
- def analyze_images(image1, image2, question):
16
  messages = [
17
  {
18
  "role": "user",
19
  "content": [
20
- {"type": "image", "image": image1},
21
- {"type": "image", "image": image2},
22
- {"type": "text", "text": question}
 
 
23
  ]
24
  }
25
  ]
26
 
27
- inputs = processor.apply_chat_template(
28
- messages,
29
- add_generation_prompt=True,
30
- tokenize=True,
31
- return_dict=True,
32
- return_tensors="pt",
33
- ).to(model.device)
34
-
35
- outputs = model.generate(
36
- **inputs,
37
- max_new_tokens=256,
38
  )
39
 
40
- response = processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
41
- return response.strip()
42
-
43
- gr.Interface(
44
- fn=analyze_images,
45
- inputs=[
46
- gr.Image(type="pil", label="Image 1"),
47
- gr.Image(type="pil", label="Image 2"),
48
- gr.Textbox(lines=2, label="Your Question"),
49
- ],
50
- outputs="text",
51
- title="LLaMA 4 Multimodal Visual Q&A",
52
- description="Upload two images and ask a question — powered by LLaMA 4"
53
- ).launch()
 
 
 
1
+ import os
2
  import gradio as gr
3
+ from huggingface_hub import InferenceClient
 
4
 
5
+ # Read your HF token from secret
6
+ client = InferenceClient(
7
+ provider="sambanova",
8
+ api_key=os.environ["HF_TOKEN"],
 
 
 
 
9
  )
10
 
11
+ def llama4_image_chat(image_url, question):
12
  messages = [
13
  {
14
  "role": "user",
15
  "content": [
16
+ {"type": "text", "text": question},
17
+ {
18
+ "type": "image_url",
19
+ "image_url": {"url": image_url}
20
+ }
21
  ]
22
  }
23
  ]
24
 
25
+ completion = client.chat.completions.create(
26
+ model="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
27
+ messages=messages
 
 
 
 
 
 
 
 
28
  )
29
 
30
+ return completion.choices[0].message.content
31
+
32
+ with gr.Blocks() as demo:
33
+ gr.Markdown("## 🦙 LLaMA 4 Visual Chat")
34
+ gr.Markdown("Upload an image URL and ask a question.")
35
+
36
+ with gr.Row():
37
+ image_url_input = gr.Textbox(label="Image URL", placeholder="Paste image URL here...")
38
+ question_input = gr.Textbox(label="Question", placeholder="e.g., Describe this image in one sentence.")
39
+
40
+ submit_btn = gr.Button("Ask LLaMA 4")
41
+ output_box = gr.Textbox(label="Response", lines=6)
42
+
43
+ submit_btn.click(fn=llama4_image_chat, inputs=[image_url_input, question_input], outputs=output_box)
44
+
45
+ demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  transformers>=4.41.0
2
  torch>=2.2.0
3
  gradio>=4.24.0
 
 
1
  transformers>=4.41.0
2
  torch>=2.2.0
3
  gradio>=4.24.0
4
+ huggingface_hub>=0.22.2