Spaces:

sandeep-huggingface
/

RAG_CSV_Chatbot

Sleeping

App Files Files Community

sandeep-huggingface commited on 28 days ago

Commit

0bdd08d

verified ·

1 Parent(s): 8d8bee7

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -12

app.py CHANGED Viewed

@@ -161,27 +161,51 @@ def load_model(model_choice: str, progress=gr.Progress()):
             print(f"Loading model {model_id}...")
             # Load model with appropriate settings for Colab
-            model = AutoModelForCausalLM.from_pretrained(
-                model_id,
-                device_map="auto",
-                trust_remote_code=True,
-                load_in_4bit=True,  # Use 4-bit quantization for memory efficiency
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                low_cpu_mem_usage=True
-            )
             progress(0.8, desc="Creating pipeline...")
             print("Creating text generation pipeline...")
             # Create pipeline
             pipe = pipeline(
                 "text-generation",
                 model=model,
                 tokenizer=tokenizer,
-                device_map="auto",
                 torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             )
             # Store globally
             current_model = model
             current_tokenizer = tokenizer
@@ -733,7 +757,7 @@ def demo():
         vector_db = gr.State()
         qa_chain = gr.State()
-        gr.HTML("<center><h1>📊 Enhanced RAG CSV Chatbot with Local Transformers</h1></center>")
         gr.HTML("<center><p>Upload CSV files and chat with your data using powerful local language models</p></center>")
         with gr.Row():

             print(f"Loading model {model_id}...")
             # Load model with appropriate settings for Colab
+            # model = AutoModelForCausalLM.from_pretrained(
+            #     model_id,
+            #     device_map="auto",
+            #     trust_remote_code=True,
+            #     load_in_4bit=True,  # Use 4-bit quantization for memory efficiency
+            #     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            #     low_cpu_mem_usage=True
+            # )
+            if torch.cuda.is_available():
+                # On GPU: use 4-bit quantization
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    device_map="auto",
+                    trust_remote_code=True,
+                    load_in_4bit=True,
+                    torch_dtype=torch.float16,
+                    low_cpu_mem_usage=True
+                )
+            else:
+                # On CPU: do NOT use 4-bit quantization
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    device_map="cpu",
+                    trust_remote_code=True,
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True
+                )
             progress(0.8, desc="Creating pipeline...")
             print("Creating text generation pipeline...")
             # Create pipeline
+            # pipe = pipeline(
+            #     "text-generation",
+            #     model=model,
+            #     tokenizer=tokenizer,
+            #     device_map="auto",
+            #     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            # )
             pipe = pipeline(
                 "text-generation",
                 model=model,
                 tokenizer=tokenizer,
+                device_map="auto" if torch.cuda.is_available() else "cpu",
                 torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
             )
             # Store globally
             current_model = model
             current_tokenizer = tokenizer
         vector_db = gr.State()
         qa_chain = gr.State()
+        gr.HTML("<center><h1>📊 Enhanced RAG CSV Chatbot</h1></center>")
         gr.HTML("<center><p>Upload CSV files and chat with your data using powerful local language models</p></center>")
         with gr.Row():