danishjameel003 commited on
Commit
c85425f
·
verified ·
1 Parent(s): bbd6502

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -10,7 +10,6 @@ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
10
  from dotenv import load_dotenv
11
  from htmlTemplates import css
12
 
13
-
14
  # Set Streamlit page configuration
15
  st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
16
 
@@ -27,7 +26,7 @@ def load_pipeline():
27
 
28
  # Load model with offload folder for disk storage of weights
29
  model = AutoModelForCausalLM.from_pretrained(
30
- model_name,
31
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
32
  device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
33
  trust_remote_code=True,
@@ -36,11 +35,11 @@ def load_pipeline():
36
 
37
  # Return text-generation pipeline
38
  return pipeline(
39
- task="text-generation",
40
- model=model,
41
- tokenizer=tokenizer,
42
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
43
- device_map="auto",
44
  return_full_text=True
45
  )
46
 
@@ -80,8 +79,8 @@ def get_chunks(raw_text):
80
  from langchain.text_splitter import CharacterTextSplitter
81
  text_splitter = CharacterTextSplitter(
82
  separator="\n",
83
- chunk_size=2000,
84
- chunk_overlap=500,
85
  length_function=len
86
  )
87
  chunks = text_splitter.split_text(raw_text)
@@ -99,11 +98,18 @@ def get_vectorstore(chunks):
99
  # Generating response from user queries
100
  def handle_question(question, vectorstore=None):
101
  if vectorstore:
102
- documents = vectorstore.similarity_search(question, k=3)
 
103
  context = "\n".join([doc.page_content for doc in documents])
 
 
 
 
104
  if context:
105
  result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
106
  return result_with_context
 
 
107
  return llm_chain.invoke({"instruction": question})
108
 
109
  def main():
 
10
  from dotenv import load_dotenv
11
  from htmlTemplates import css
12
 
 
13
  # Set Streamlit page configuration
14
  st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
15
 
 
26
 
27
  # Load model with offload folder for disk storage of weights
28
  model = AutoModelForCausalLM.from_pretrained(
29
+ model_name,
30
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, # Use float16 for GPU, float32 for CPU
31
  device_map="auto", # Automatically map model to available devices (e.g., GPU if available)
32
  trust_remote_code=True,
 
35
 
36
  # Return text-generation pipeline
37
  return pipeline(
38
+ task="text-generation",
39
+ model=model,
40
+ tokenizer=tokenizer,
41
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
42
+ device_map="auto",
43
  return_full_text=True
44
  )
45
 
 
79
  from langchain.text_splitter import CharacterTextSplitter
80
  text_splitter = CharacterTextSplitter(
81
  separator="\n",
82
+ chunk_size=1000, # Reduced chunk size for faster processing
83
+ chunk_overlap=200, # Smaller overlap for efficiency
84
  length_function=len
85
  )
86
  chunks = text_splitter.split_text(raw_text)
 
98
  # Generating response from user queries
99
  def handle_question(question, vectorstore=None):
100
  if vectorstore:
101
+ # Reduce the number of retrieved chunks for faster processing
102
+ documents = vectorstore.similarity_search(question, k=2)
103
  context = "\n".join([doc.page_content for doc in documents])
104
+
105
+ # Limit context to 1000 characters to speed up model inference
106
+ context = context[:1000]
107
+
108
  if context:
109
  result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
110
  return result_with_context
111
+
112
+ # Fallback to instruction-only chain if no context is found
113
  return llm_chain.invoke({"instruction": question})
114
 
115
  def main():