Spaces:
Runtime error
Runtime error
File size: 1,607 Bytes
3318fbd 95672ad e676e49 3318fbd ce3949f e471312 71c587d ed000ab 71c587d 18d8e23 71c587d ed000ab 79e0339 71c587d 18d8e23 79e0339 18d8e23 71c587d e471312 3318fbd e471312 3318fbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# main.py
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from huggingface_hub import HfApi, HfFolder
# Replace '<your_api_token>' with your actual API token
api_token = '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'
# Initialize the HfApi with the API token
api = HfApi(token=api_token)
print('loggedin')
# Verify that you're logged in
user = api.whoami()
print(user)
# Load model and tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
print("started loading model")
api_token = "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" # Replace with your actual API token
model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
revision="main", # Or the desired revision
token=api_token
)
# return_dict=True,
# torch_dtype=torch.float16,
print("loaded model")
tokenizer = AutoTokenizer.from_pretrained(
model_name,
revision="main",
token=api_token
# Or the desired revision
)
print("loaded tokenizer")
chat_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("built pipeline")
# Define the generate_response function
def generate_response(prompt):
response = chat_pipeline(prompt, max_length=50)[0]['generated_text']
return response
# Create Gradio interface
interface = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
layout="vertical",
title="LLAMA-2-7B Chatbot",
description="Enter a prompt and get a chatbot response.",
examples=[["Tell me a joke."]],
)
if __name__ == "__main__":
interface.launch()
|