Spaces:

danishjameel003
/

CSSChatbot

Sleeping

App Files Files Community

danishjameel003 commited on Jan 27

Commit

c85425f

verified ·

1 Parent(s): bbd6502

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from dotenv import load_dotenv
 from htmlTemplates import css
 # Set Streamlit page configuration
 st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
@@ -27,7 +26,7 @@ def load_pipeline():
     # Load model with offload folder for disk storage of weights
     model = AutoModelForCausalLM.from_pretrained(
-        model_name,
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,  # Use float16 for GPU, float32 for CPU
         device_map="auto",          # Automatically map model to available devices (e.g., GPU if available)
         trust_remote_code=True,
@@ -36,11 +35,11 @@ def load_pipeline():
     # Return text-generation pipeline
     return pipeline(
-        task="text-generation",
-        model=model,
-        tokenizer=tokenizer,
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto",
         return_full_text=True
     )
@@ -80,8 +79,8 @@ def get_chunks(raw_text):
     from langchain.text_splitter import CharacterTextSplitter
     text_splitter = CharacterTextSplitter(
         separator="\n",
-        chunk_size=2000,
-        chunk_overlap=500,
         length_function=len
     )
     chunks = text_splitter.split_text(raw_text)
@@ -99,11 +98,18 @@ def get_vectorstore(chunks):
 # Generating response from user queries
 def handle_question(question, vectorstore=None):
     if vectorstore:
-        documents = vectorstore.similarity_search(question, k=3)
         context = "\n".join([doc.page_content for doc in documents])
         if context:
             result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
             return result_with_context
     return llm_chain.invoke({"instruction": question})
 def main():

 from dotenv import load_dotenv
 from htmlTemplates import css
 # Set Streamlit page configuration
 st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
     # Load model with offload folder for disk storage of weights
     model = AutoModelForCausalLM.from_pretrained(
+        model_name,
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,  # Use float16 for GPU, float32 for CPU
         device_map="auto",          # Automatically map model to available devices (e.g., GPU if available)
         trust_remote_code=True,
     # Return text-generation pipeline
     return pipeline(
+        task="text-generation",
+        model=model,
+        tokenizer=tokenizer,
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto",
         return_full_text=True
     )
     from langchain.text_splitter import CharacterTextSplitter
     text_splitter = CharacterTextSplitter(
         separator="\n",
+        chunk_size=1000,  # Reduced chunk size for faster processing
+        chunk_overlap=200,  # Smaller overlap for efficiency
         length_function=len
     )
     chunks = text_splitter.split_text(raw_text)
 # Generating response from user queries
 def handle_question(question, vectorstore=None):
     if vectorstore:
+        # Reduce the number of retrieved chunks for faster processing
+        documents = vectorstore.similarity_search(question, k=2)
         context = "\n".join([doc.page_content for doc in documents])
+        # Limit context to 1000 characters to speed up model inference
+        context = context[:1000]
         if context:
             result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
             return result_with_context
+    # Fallback to instruction-only chain if no context is found
     return llm_chain.invoke({"instruction": question})
 def main():