Spaces:

PitterTMYT
/

LLM

Sleeping

LLM

File size: 1,511 Bytes

544984e
b4d7841
 
952b9c8
544984e
b4d7841
 
544984e
 
 
283af5c
544984e
b4d7841
 
544984e
952b9c8
544984e
 
 
 
 
 
283af5c
544984e
 
 
 
 
 
 
 
 
 
 
 
283af5c
 
544984e
 
 
 
 
 
283af5c
544984e
 
283af5c
c96c3f8
544984e
952b9c8
283af5c
544984e
3d2becb
544984e
3d2becb
544984e
 
3d2becb
3ec5e4c
544984e

import gradio as gr
import transformers
import torch

# Model and pipeline setup
model_id = "yodayo-ai/nephra_v1.0"

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    offload_folder="offload"  # Ensure this folder is available or adjust the path
)

tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
)

# Function to generate a response
def generate_response(user_input):
    messages = [
        {"role": "system", "content": "You are to play the role of a cheerful assistant."},
        {"role": "user", "content": user_input},
    ]

    prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    outputs = pipeline(
        prompt,
        max_new_tokens=512,
        eos_token_id=[
            pipeline.tokenizer.convert_tokens_to_ids(""),
            pipeline.tokenizer.eos_token_id,
        ],
        do_sample=True,
        temperature=1.12,
        min_p=0.075,
    )
    
    return outputs[0]["generated_text"][len(prompt):]

# Gradio Interface
interface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="Chat with Nephra",
    description="Interact with the Nephra model, a roleplaying and instruction-based AI.",
)

# Launch the Gradio app
interface.launch()