File size: 1,148 Bytes
4375924
 
88e8a85
 
4375924
88e8a85
4375924
 
cc7c86a
88e8a85
cc7c86a
88e8a85
 
 
 
 
 
 
 
 
4375924
 
 
 
 
88e8a85
4375924
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import bitsandbytes as bnb
import torch

# Load the model and tokenizer with 4-bit quantization
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")
    model = AutoModelForCausalLM.from_pretrained(
        "Qwen/Qwen2-7B",
        load_in_4bit=True,
        device_map="auto",
        quantization_config=bnb.QuantizationConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16
        )
    )
    return tokenizer, model

tokenizer, model = load_model()

# Streamlit app UI
st.title("Qwen-7B Text Generation with 4-bit Quantization")

# Text input
user_input = st.text_area("Enter your text:")

# Generate text on button click
if st.button("Generate"):
    inputs = tokenizer(user_input, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    st.write("Generated Text:")
    st.write(generated_text)