Spaces:

adilkh26
/

InternVideo2_5_Chat_8B

Build error

File size: 1,235 Bytes

8ba8ba9
2372d69
bb1f5be
8ba8ba9
2372d69
 
8ba8ba9
4f95d01
 
2372d69
 
8ba8ba9
2272006
 
bb1f5be
2272006
cde97aa
2272006
2372d69
2272006
 
8ba8ba9
 
2272006
 
bb1f5be
2372d69
 
2272006
2372d69
 
 
 
 
 
 
 
 
 
 
 
8ba8ba9
2372d69
8ba8ba9

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Model name
model_name = "OpenGVLab/InternVideo2_5_Chat_8B"



# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Detect device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,  # Use float16 on GPU, float32 on CPU
    device_map="auto" if device == "cuda" else None  # Use GPU if available
)

# Move model to device
model.to(device)

# Define inference function
def chat_with_model(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output = model.generate(**inputs, max_length=200)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Create Gradio UI
demo = gr.Interface(
    fn=chat_with_model,
    inputs=gr.Textbox(placeholder="Type your prompt here..."),
    outputs="text",
    title="InternVideo2.5 Chatbot",
    description="A chatbot powered by InternVideo2_5_Chat_8B.",
    theme="compact"
)

# Run the Gradio app
if __name__ == "__main__":
    demo.launch()