Spaces:
Running
on
Zero
Running
on
Zero
File size: 876 Bytes
b4a15fa 9a90ab4 b4a15fa 9a90ab4 b4a15fa 9a90ab4 b4a15fa 9a90ab4 b4a15fa 9a90ab4 b4a15fa 9a90ab4 b4a15fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import gradio as gr
import torch
login(token = os.getenv('HF_TOKEN'))
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct")
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.2-11B-Vision-Instruct",
device_map="auto",
torch_dtype="auto",
)
def generate_response(message, history):
inputs = tokenizer(message['text'], return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(inputs.input_ids, max_length=100)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
demo = gr.ChatInterface(
fn=generate_response,
examples=[{"text": "Hello", "files": []}],
title="LLAMA 3.2 Chat",
multimodal=True
)
demo.launch(debug = True) |