Spaces:

danishjameel003
/

CSSChatbot

Sleeping

danishjameel003 commited on Jan 27

Commit

74d69bb

verified ·

1 Parent(s): 4e15e87

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,17 +7,9 @@ from langchain.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain import PromptTemplate, LLMChain
 from langchain.llms import HuggingFacePipeline
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from dotenv import load_dotenv
 from htmlTemplates import css
-import warnings
-# Suppress GPTNeoXSdpaAttention deprecation warnings
-warnings.filterwarnings(
-    "ignore",
-    message="The `GPTNeoXSdpaAttention` class is deprecated",
-    category=UserWarning
-)
 # Load environment variables
 load_dotenv()
@@ -25,19 +17,25 @@ load_dotenv()
 # Dolly-v2-3b model pipeline
 @st.cache_resource
 def load_pipeline():
     model_name = "databricks/dolly-v2-3b"
-    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    # Update _attn_implementation
-    model.config._attn_implementation = "triton"  # Or another supported implementation
     return pipeline(
-        model=model,
-        tokenizer=tokenizer,
-        torch_dtype=torch.float32,  # Use float32 for CPU
-        device_map="cpu",  # Force CPU usage
-        return_full_text=True
     )
 # Initialize Dolly pipeline

 from langchain.memory import ConversationBufferMemory
 from langchain import PromptTemplate, LLMChain
 from langchain.llms import HuggingFacePipeline
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from dotenv import load_dotenv
 from htmlTemplates import css
 # Load environment variables
 load_dotenv()
 # Dolly-v2-3b model pipeline
 @st.cache_resource
 def load_pipeline():
+    # Use recommended settings for Dolly-v2-3b
     model_name = "databricks/dolly-v2-3b"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,  # Use bfloat16 to reduce memory usage
+        device_map="auto",          # Automatically map model to available devices (e.g., GPU if available)
+        trust_remote_code=True
+    )
+    # Load the pipeline with required configurations
     return pipeline(
+        task="text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        return_full_text=True  # Required for LangChain compatibility
     )
 # Initialize Dolly pipeline