danishjameel003 commited on
Commit
74d69bb
·
verified ·
1 Parent(s): 4e15e87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -7,17 +7,9 @@ from langchain.vectorstores import FAISS
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain import PromptTemplate, LLMChain
9
  from langchain.llms import HuggingFacePipeline
10
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
11
  from dotenv import load_dotenv
12
  from htmlTemplates import css
13
- import warnings
14
-
15
- # Suppress GPTNeoXSdpaAttention deprecation warnings
16
- warnings.filterwarnings(
17
- "ignore",
18
- message="The `GPTNeoXSdpaAttention` class is deprecated",
19
- category=UserWarning
20
- )
21
 
22
  # Load environment variables
23
  load_dotenv()
@@ -25,19 +17,25 @@ load_dotenv()
25
  # Dolly-v2-3b model pipeline
26
  @st.cache_resource
27
  def load_pipeline():
 
28
  model_name = "databricks/dolly-v2-3b"
29
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
30
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
31
 
32
- # Update _attn_implementation
33
- model.config._attn_implementation = "triton" # Or another supported implementation
 
 
 
 
 
34
 
 
35
  return pipeline(
36
- model=model,
37
- tokenizer=tokenizer,
38
- torch_dtype=torch.float32, # Use float32 for CPU
39
- device_map="cpu", # Force CPU usage
40
- return_full_text=True
 
41
  )
42
 
43
  # Initialize Dolly pipeline
 
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain import PromptTemplate, LLMChain
9
  from langchain.llms import HuggingFacePipeline
10
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
11
  from dotenv import load_dotenv
12
  from htmlTemplates import css
 
 
 
 
 
 
 
 
13
 
14
  # Load environment variables
15
  load_dotenv()
 
17
  # Dolly-v2-3b model pipeline
18
  @st.cache_resource
19
  def load_pipeline():
20
+ # Use recommended settings for Dolly-v2-3b
21
  model_name = "databricks/dolly-v2-3b"
 
 
22
 
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
24
+ model = AutoModelForCausalLM.from_pretrained(
25
+ model_name,
26
+ torch_dtype=torch.bfloat16, # Use bfloat16 to reduce memory usage
27
+ device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
28
+ trust_remote_code=True
29
+ )
30
 
31
+ # Load the pipeline with required configurations
32
  return pipeline(
33
+ task="text-generation",
34
+ model=model,
35
+ tokenizer=tokenizer,
36
+ torch_dtype=torch.bfloat16,
37
+ device_map="auto",
38
+ return_full_text=True # Required for LangChain compatibility
39
  )
40
 
41
  # Initialize Dolly pipeline