Spaces:

danishjameel003
/

CSSChatbot

Sleeping

App Files Files Community

danishjameel003 commited on Jan 28

Commit

160fbe1

verified ·

1 Parent(s): 3455401

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -73

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import os
-import torch
 import streamlit as st
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain_core.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from langchain_community.llms import HuggingFacePipeline
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from dotenv import load_dotenv
 # Set Streamlit page configuration
@@ -15,54 +12,28 @@ st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", lay
 # Load environment variables
 load_dotenv()
-# Dolly-v2-3b model pipeline
-@st.cache_resource
-def load_pipeline():
-    model_name = "databricks/dolly-v2-3b"
-    # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
-    # Load model with offload folder for disk storage of weights
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,  # Use float16 for GPU, float32 for CPU
-        device_map="auto",          # Automatically map model to available devices (e.g., GPU if available)
-        trust_remote_code=True,
-        offload_folder="./offload_weights"  # Folder to store offloaded weights
-    )
-    # Return text-generation pipeline
-    return pipeline(
-        task="text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto",
-        return_full_text=True
-    )
-# Initialize Dolly pipeline
-generate_text = load_pipeline()
-# Create a HuggingFace pipeline wrapper for LangChain
-hf_pipeline = HuggingFacePipeline(pipeline=generate_text)
 # Template for instruction-only prompts
-prompt = PromptTemplate(
-    input_variables=["instruction"],
-    template="{instruction}"
-)
-# Template for prompts with context
-prompt_with_context = PromptTemplate(
-    input_variables=["instruction", "context"],
-    template="{instruction}\n\nInput:\n{context}"
-)
-# Create LLM chains
-llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)
-llm_context_chain = LLMChain(llm=hf_pipeline, prompt=prompt_with_context)
 # Extracting text from .txt files
 def get_text_files_content(folder):
@@ -75,41 +46,31 @@ def get_text_files_content(folder):
 # Converting text to chunks
 def get_chunks(raw_text):
-    from langchain.text_splitter import CharacterTextSplitter
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=1000,  # Reduced chunk size for faster processing
         chunk_overlap=200,  # Smaller overlap for efficiency
         length_function=len
     )
-    chunks = text_splitter.split_text(raw_text)
-    return chunks
-# Using Hugging Face embeddings model and FAISS to create vectorstore
 def get_vectorstore(chunks):
-    embeddings = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-MiniLM-L6-v2",
-        model_kwargs={'device': 'cpu'}  # Ensure embeddings use CPU
-    )
     vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
     return vectorstore
 # Generating response from user queries
 def handle_question(question, vectorstore=None):
     if vectorstore:
-        # Reduce the number of retrieved chunks for faster processing
         documents = vectorstore.similarity_search(question, k=2)
         context = "\n".join([doc.page_content for doc in documents])
-        # Limit context to 1000 characters to speed up model inference
-        context = context[:1000]
-        if context:
-            result_with_context = llm_context_chain.invoke({"instruction": question, "context": context})
-            return result_with_context
-    # Fallback to instruction-only chain if no context is found
-    return llm_chain.invoke({"instruction": question})
 def main():
     st.title("Chat with Notes :books:")
@@ -171,7 +132,7 @@ def main():
         if st.session_state.vectorstore:
             response = handle_question(question, st.session_state.vectorstore)
             st.subheader("Answer:")
-            st.write(response.get("text", "No response found."))
         else:
             st.warning("Please load the content for the selected subject before asking a question.")

+import openai
 import os
 import streamlit as st
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings
 from dotenv import load_dotenv
 # Set Streamlit page configuration
 # Load environment variables
 load_dotenv()
+# OpenAI API Key (set in .env or directly in your environment)
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
+openai.api_key = OPENAI_API_KEY
 # Template for instruction-only prompts
+def generate_openai_response(instruction, context=None):
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": instruction},
+        ]
+        if context:
+            messages.append({"role": "user", "content": f"Context: {context}"})
+        response = openai.ChatCompletion.create(
+            model="gpt-4",
+            messages=messages,
+            max_tokens=1200,
+            temperature=0.7
+        )
+        return response["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"Error: {str(e)}"
 # Extracting text from .txt files
 def get_text_files_content(folder):
 # Converting text to chunks
 def get_chunks(raw_text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=1000,  # Reduced chunk size for faster processing
         chunk_overlap=200,  # Smaller overlap for efficiency
         length_function=len
     )
+    return text_splitter.split_text(raw_text)
+# Using OpenAI embeddings model and FAISS to create vectorstore
 def get_vectorstore(chunks):
+    embeddings = OpenAIEmbeddings()  # Uses OpenAI Embeddings
     vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
     return vectorstore
 # Generating response from user queries
 def handle_question(question, vectorstore=None):
     if vectorstore:
+        # Retrieve relevant chunks using similarity search
         documents = vectorstore.similarity_search(question, k=2)
         context = "\n".join([doc.page_content for doc in documents])
+        context = context[:1000]  # Limit context size for faster processing
+        return generate_openai_response(question, context)
+    else:
+        # Fallback to instruction-only prompt if no context is found
+        return generate_openai_response(question)
 def main():
     st.title("Chat with Notes :books:")
         if st.session_state.vectorstore:
             response = handle_question(question, st.session_state.vectorstore)
             st.subheader("Answer:")
+            st.write(response)
         else:
             st.warning("Please load the content for the selected subject before asking a question.")