Spaces:

mimireyburn
/

theOnion

Paused

theOnion / app.py

Remove 8bit

8450044 over 1 year ago

1.35 kB

	import gradio as gr
	import transformers as t
	import torch
	import peft

	# Load your fine-tuned model and tokenizer
	tokenizer = t.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf")
	model = t.AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-hf")
	tokenizer.pad_token_id = 0

	config = peft.LoraConfig(r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.005, bias="none", task_type="CAUSAL_LM")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = peft.get_peft_model(model, config).to(device)

	peft.set_peft_model_state_dict(model, torch.load(f".weights/adapter_model.bin"))

	# Define a prediction function
	def generate_article(title):
	prompt = f"Below is a title for an article. Write an article that appropriately suits the title: \n\n### Title:\n{title}\n\n### Article:\n"
	pipe = t.pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1000)
	output = pipe([prompt])
	generated_article = output[0][0]["generated_text"]
	return generated_article

	# Create a Gradio interface
	iface = gr.Interface(
	fn=generate_article,
	inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Article Title Here"),
	outputs="text",
	title="Article Generator",
	description="Enter a title to generate an article."
	)

	# Launch the app
	iface.launch()