File size: 1,148 Bytes
4375924 88e8a85 4375924 88e8a85 4375924 cc7c86a 88e8a85 cc7c86a 88e8a85 4375924 88e8a85 4375924 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import bitsandbytes as bnb
import torch
# Load the model and tokenizer with 4-bit quantization
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2-7B",
load_in_4bit=True,
device_map="auto",
quantization_config=bnb.QuantizationConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
)
return tokenizer, model
tokenizer, model = load_model()
# Streamlit app UI
st.title("Qwen-7B Text Generation with 4-bit Quantization")
# Text input
user_input = st.text_area("Enter your text:")
# Generate text on button click
if st.button("Generate"):
inputs = tokenizer(user_input, return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.write("Generated Text:")
st.write(generated_text)
|