Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,17 +7,9 @@ from langchain.vectorstores import FAISS
|
|
7 |
from langchain.memory import ConversationBufferMemory
|
8 |
from langchain import PromptTemplate, LLMChain
|
9 |
from langchain.llms import HuggingFacePipeline
|
10 |
-
from transformers import pipeline,
|
11 |
from dotenv import load_dotenv
|
12 |
from htmlTemplates import css
|
13 |
-
import warnings
|
14 |
-
|
15 |
-
# Suppress GPTNeoXSdpaAttention deprecation warnings
|
16 |
-
warnings.filterwarnings(
|
17 |
-
"ignore",
|
18 |
-
message="The `GPTNeoXSdpaAttention` class is deprecated",
|
19 |
-
category=UserWarning
|
20 |
-
)
|
21 |
|
22 |
# Load environment variables
|
23 |
load_dotenv()
|
@@ -25,19 +17,25 @@ load_dotenv()
|
|
25 |
# Dolly-v2-3b model pipeline
|
26 |
@st.cache_resource
|
27 |
def load_pipeline():
|
|
|
28 |
model_name = "databricks/dolly-v2-3b"
|
29 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
30 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
31 |
|
32 |
-
|
33 |
-
model
|
|
|
|
|
|
|
|
|
|
|
34 |
|
|
|
35 |
return pipeline(
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
41 |
)
|
42 |
|
43 |
# Initialize Dolly pipeline
|
|
|
7 |
from langchain.memory import ConversationBufferMemory
|
8 |
from langchain import PromptTemplate, LLMChain
|
9 |
from langchain.llms import HuggingFacePipeline
|
10 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
11 |
from dotenv import load_dotenv
|
12 |
from htmlTemplates import css
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Load environment variables
|
15 |
load_dotenv()
|
|
|
17 |
# Dolly-v2-3b model pipeline
|
18 |
@st.cache_resource
|
19 |
def load_pipeline():
|
20 |
+
# Use recommended settings for Dolly-v2-3b
|
21 |
model_name = "databricks/dolly-v2-3b"
|
|
|
|
|
22 |
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
|
24 |
+
model = AutoModelForCausalLM.from_pretrained(
|
25 |
+
model_name,
|
26 |
+
torch_dtype=torch.bfloat16, # Use bfloat16 to reduce memory usage
|
27 |
+
device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
|
28 |
+
trust_remote_code=True
|
29 |
+
)
|
30 |
|
31 |
+
# Load the pipeline with required configurations
|
32 |
return pipeline(
|
33 |
+
task="text-generation",
|
34 |
+
model=model,
|
35 |
+
tokenizer=tokenizer,
|
36 |
+
torch_dtype=torch.bfloat16,
|
37 |
+
device_map="auto",
|
38 |
+
return_full_text=True # Required for LangChain compatibility
|
39 |
)
|
40 |
|
41 |
# Initialize Dolly pipeline
|