Spaces:

iman37
/

math_code

Sleeping

iman37 commited on Aug 25, 2024

Commit

88e8a85

verified ·

1 Parent(s): ecc6d46

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,17 +1,29 @@
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load the model and tokenizer
 @st.cache_resource
 def load_model():
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B")
-    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-7B")
     return tokenizer, model
 tokenizer, model = load_model()
 # Streamlit app UI
-st.title("Qwen-7B Text Generation")
 # Text input
 user_input = st.text_area("Enter your text:")

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
+import bitsandbytes as bnb
+import torch
+# Load the model and tokenizer with 4-bit quantization
 @st.cache_resource
 def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B")
+    model = AutoModelForCausalLM.from_pretrained(
+        "Qwen/Qwen-7B",
+        load_in_4bit=True,
+        device_map="auto",
+        quantization_config=bnb.QuantizationConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16
+        )
+    )
     return tokenizer, model
 tokenizer, model = load_model()
 # Streamlit app UI
+st.title("Qwen-7B Text Generation with 4-bit Quantization")
 # Text input
 user_input = st.text_area("Enter your text:")