Spaces:
Sleeping
Sleeping
File size: 1,511 Bytes
544984e b4d7841 952b9c8 544984e b4d7841 544984e 283af5c 544984e b4d7841 544984e 952b9c8 544984e 283af5c 544984e 283af5c 544984e 283af5c 544984e 283af5c c96c3f8 544984e 952b9c8 283af5c 544984e 3d2becb 544984e 3d2becb 544984e 3d2becb 3ec5e4c 544984e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
import transformers
import torch
# Model and pipeline setup
model_id = "yodayo-ai/nephra_v1.0"
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
offload_folder="offload" # Ensure this folder is available or adjust the path
)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
)
# Function to generate a response
def generate_response(user_input):
messages = [
{"role": "system", "content": "You are to play the role of a cheerful assistant."},
{"role": "user", "content": user_input},
]
prompt = pipeline.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
outputs = pipeline(
prompt,
max_new_tokens=512,
eos_token_id=[
pipeline.tokenizer.convert_tokens_to_ids(""),
pipeline.tokenizer.eos_token_id,
],
do_sample=True,
temperature=1.12,
min_p=0.075,
)
return outputs[0]["generated_text"][len(prompt):]
# Gradio Interface
interface = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="Chat with Nephra",
description="Interact with the Nephra model, a roleplaying and instruction-based AI.",
)
# Launch the Gradio app
interface.launch() |