math_code / app.py
iman37's picture
Update app.py
cc7c86a verified
raw
history blame
1.15 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import bitsandbytes as bnb
import torch
# Load the model and tokenizer with 4-bit quantization
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2-7B",
load_in_4bit=True,
device_map="auto",
quantization_config=bnb.QuantizationConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
)
return tokenizer, model
tokenizer, model = load_model()
# Streamlit app UI
st.title("Qwen-7B Text Generation with 4-bit Quantization")
# Text input
user_input = st.text_area("Enter your text:")
# Generate text on button click
if st.button("Generate"):
inputs = tokenizer(user_input, return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.write("Generated Text:")
st.write(generated_text)