Spaces:
Running
Running
File size: 2,375 Bytes
8b19012 68918ad 02af69b 8b19012 8bc6b74 8b19012 8bc6b74 8b19012 8bc6b74 8b19012 8bc6b74 68918ad 8b19012 68918ad 02af69b 68918ad e0128dd 68918ad e0128dd 68918ad 02af69b 68918ad 8b19012 68918ad 8bc6b74 e0128dd 8b19012 6a41687 68918ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
import gradio as gr
from gradio import deploy
def generate_prompt(instruction, input=""):
instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
if input:
return f"""Instruction: {instruction}
Input: {input}
Response:"""
else:
return f"""User: hi
Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
User: {instruction}
Assistant:"""
model_path = "models/rwkv-6-world-1b6/" # Path to your local model directory
model = AutoModelForCausalLM.from_pretrained(
model_path,
trust_remote_code=True,
# use_flash_attention_2=False
).to(torch.float32)
model = model.quantize(8) # Quantize to int8 (experiment with different values)
model = model.to("cpu")
# Create a custom tokenizer (make sure to download vocab.json)
tokenizer = AutoTokenizer.from_pretrained(
model_path,
bos_token="</s>",
eos_token="</ s>",
unk_token="<unk>",
pad_token="<pad>",
trust_remote_code=True,
padding_side='left',
clean_up_tokenization_spaces=False # Or set to True if you prefer
)
# Function to handle text generation with word-by-word output and stop sequence
def generate_text(input_text):
prompt = generate_prompt(input_text)
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
generated_text = ""
for i in range(333):
output = model.generate(input_ids, max_new_tokens=1, do_sample=True, temperature=1.0, top_p=0.3, top_k=0)
new_word = tokenizer.decode(output[0][-1:], skip_special_tokens=True)
print(new_word, end="", flush=True) # Print to console for monitoring
generated_text += new_word
input_ids = output
yield generated_text # Yield the updated text after each word
# Create the Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs="text",
outputs="text",
title="RWKV Chatbot",
description="Enter your prompt below:",
# flagging_callback=None
flagging_dir="gradio_flagged/"
)
# For local testing:
iface.launch(share=False)
# deploy()
# Hugging Face Spaces will automatically launch the interface.
|