cpu-casuallm / app.py
Somunia's picture
Upload 28 files
8b19012 verified
raw
history blame
1.78 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
def generate_prompt(instruction, input=""):
instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
if input:
return f"""Instruction: {instruction}
Input: {input}
Response:"""
else:
return f"""User: hi
Lover: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
User: {instruction}
Lover:"""
model_path = "models/rwkv-6-world-1b6/" # Path to your local model directory
model = AutoModelForCausalLM.from_pretrained(
model_path,
trust_remote_code=True,
use_flash_attention_2=False # Explicitly disable Flash Attention
).to(torch.float32)
tokenizer = AutoTokenizer.from_pretrained(
model_path,
bos_token="</s>",
eos_token="</ s>",
unk_token="<unk>",
pad_token="<pad>",
trust_remote_code=True,
padding_side='left',
clean_up_tokenization_spaces=False # Or set to True if you prefer
)
print(tokenizer.special_tokens_map)
text = "Hi"
prompt = generate_prompt(text)
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
# Generate text word by word with stop sequence
generated_text = ""
for i in range(333): # Generate up to 333 tokens
output = model.generate(input_ids, max_new_tokens=1, do_sample=True, temperature=1.0, top_p=0.3, top_k=0)
new_word = tokenizer.decode(output[0][-1:], skip_special_tokens=True)
print(new_word, end="", flush=True) # Print word-by-word
generated_text += new_word
input_ids = output # Update input_ids for next iteration
print() # Add a newline at the end