Spaces:

Hunzla
/

llama2-chat

Runtime error

File size: 1,607 Bytes

3318fbd
 
 
95672ad
e676e49
 
 
 
 
 
 
 
 
 
 
3318fbd
ce3949f
e471312
71c587d
 
 
 
 
 
ed000ab
71c587d
18d8e23
71c587d
ed000ab
 
79e0339
71c587d
 
 
18d8e23
79e0339
18d8e23
71c587d
 
e471312
3318fbd
e471312
3318fbd

# main.py
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from huggingface_hub import HfApi, HfFolder

# Replace '<your_api_token>' with your actual API token
api_token = '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'

# Initialize the HfApi with the API token
api = HfApi(token=api_token)
print('loggedin')
# Verify that you're logged in
user = api.whoami()
print(user)
# Load model and tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
print("started loading model")

api_token = "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"  # Replace with your actual API token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,

    revision="main",  # Or the desired revision
    token=api_token
)
    # return_dict=True,
    # torch_dtype=torch.float16,
print("loaded model")

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    revision="main", 
    token=api_token
    # Or the desired revision
)

print("loaded tokenizer")
chat_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("built pipeline")

# Define the generate_response function
def generate_response(prompt):
    response = chat_pipeline(prompt, max_length=50)[0]['generated_text']
    return response

# Create Gradio interface
interface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    layout="vertical",
    title="LLAMA-2-7B Chatbot",
    description="Enter a prompt and get a chatbot response.",
    examples=[["Tell me a joke."]],
)

if __name__ == "__main__":
    interface.launch()