Perplexity-AI-Context

Running

App Files Files Community

Shreyas094 commited on Jul 23, 2024

Commit

3450cd7

verified ·

1 Parent(s): a6a5ca5

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -104

app.py CHANGED Viewed

@@ -44,17 +44,18 @@ def load_spacy_model():
 nlp = load_spacy_model()
 class EnhancedContextDrivenChatbot:
-    def __init__(self, history_size=10, model=None):
         self.history = []
         self.history_size = history_size
         self.entity_tracker = {}
         self.conversation_context = ""
-        self.model = model
         self.last_instructions = None
-    def add_to_history(self, text):
         self.history.append(text)
-        if len(self.history) > self.history_size:
             self.history.pop(0)
         # Update entity tracker
@@ -221,6 +222,28 @@ def get_model(temperature, top_p, repetition_penalty):
         huggingfacehub_api_token=huggingface_token
     )
 def generate_chunked_response(model, prompt, max_tokens=1000, max_chunks=5):
     full_response = ""
     for i in range(max_chunks):
@@ -329,115 +352,51 @@ def estimate_tokens(text):
     # Rough estimate: 1 token ~= 4 characters
     return len(text) // 4
-def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot):
-    if not question:
-        return "Please enter a question."
     model = get_model(temperature, top_p, repetition_penalty)
-    # Update the chatbot's model
     chatbot.model = model
-    embed = get_embeddings()
-    if os.path.exists("faiss_database"):
-        database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
-    else:
-        database = None
-    max_attempts = 5
-    context_reduction_factor = 0.7
-    max_tokens = 32000  # Maximum tokens allowed by the model
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
-        serializable_entity_tracker = {k: list(v) for k, v in entity_tracker.items()}
         search_results = google_search(contextualized_question, num_results=3)
-        all_answers = []
-        for attempt in range(max_attempts):
-            try:
-                web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
-                if database is None:
-                    database = FAISS.from_documents(web_docs, embed)
-                else:
-                    database.add_documents(web_docs)
-                database.save_local("faiss_database")
-                context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
-                instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
-                prompt_template = f"""
-                Answer the question based on the following web search results, conversation context, entity information, and user instructions:
-                Web Search Results:
-                {{context}}
-                Conversation Context: {{conv_context}}
-                Current Question: {{question}}
-                Topics: {{topics}}
-                Entity Information: {{entities}}
-                {instruction_prompt}
-                Provide a concise and relevant answer to the question.
-                """
-                prompt_val = ChatPromptTemplate.from_template(prompt_template)
-                # Start with full context and progressively reduce if necessary
-                current_context = context_str
-                current_conv_context = chatbot.get_context()
-                current_topics = topics
-                current_entities = serializable_entity_tracker
-                while True:
-                    formatted_prompt = prompt_val.format(
-                        context=current_context,
-                        conv_context=current_conv_context,
-                        question=question,
-                        topics=", ".join(current_topics),
-                        entities=json.dumps(current_entities)
-                    )
-                    # Estimate token count
-                    estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_tokens - 1000:  # Leave 1000 tokens for the model's response
-                        break
-                    # Reduce context if estimated token count is too high
-                    current_context = current_context[:int(len(current_context) * context_reduction_factor)]
-                    current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
-                    current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
-                    current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
-                    if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
-                        raise ValueError("Context reduced too much. Unable to process the query.")
-                full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
-                answer = extract_answer(full_response, instructions)
-                all_answers.append(answer)
                 break
-            except ValueError as ve:
-                print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
-                if attempt == max_attempts - 1:
-                    all_answers.append(f"I apologize, but I'm having trouble processing the query due to its length or complexity. Could you please try asking a more specific or shorter question?")
-            except Exception as e:
-                print(f"Error in ask_question (attempt {attempt + 1}): {e}")
-                if attempt == max_attempts - 1:
-                    all_answers.append(f"I apologize, but an unexpected error occurred. Please try again with a different question or check your internet connection.")
-        answer = "\n\n".join(all_answers)
-        sources = set(doc.metadata['source'] for doc in web_docs)
-        sources_section = "\n\nSources:\n" + "\n".join(f"- {source}" for source in sources)
-        answer += sources_section
         # Update chatbot context with the answer
         chatbot.add_to_history(answer)
         return answer
     else:  # PDF document chat

 nlp = load_spacy_model()
 class EnhancedContextDrivenChatbot:
+    def __init__(self, history_size: int = 10, max_history_chars: int = 5000):
         self.history = []
         self.history_size = history_size
+        self.max_history_chars = max_history_chars
         self.entity_tracker = {}
         self.conversation_context = ""
+        self.model = None
         self.last_instructions = None
+    def add_to_history(self, text: str):
         self.history.append(text)
+        while len(' '.join(self.history)) > self.max_history_chars or len(self.history) > self.history_size:
             self.history.pop(0)
         # Update entity tracker
         huggingfacehub_api_token=huggingface_token
     )
+MAX_PROMPT_CHARS = 24000  # Adjust based on your model's limitations
+def chunk_text(text: str, max_chunk_size: int = 1000) -> List[str]:
+    chunks = []
+    current_chunk = ""
+    for sentence in re.split(r'(?<=[.!?])\s+', text):
+        if len(current_chunk) + len(sentence) > max_chunk_size:
+            chunks.append(current_chunk.strip())
+            current_chunk = sentence
+        else:
+            current_chunk += " " + sentence
+    if current_chunk:
+        chunks.append(current_chunk.strip())
+    return chunks
+def get_most_relevant_chunks(question: str, chunks: List[str], top_k: int = 3) -> List[str]:
+    question_embedding = sentence_model.encode([question])[0]
+    chunk_embeddings = sentence_model.encode(chunks)
+    similarities = cosine_similarity([question_embedding], chunk_embeddings)[0]
+    top_indices = np.argsort(similarities)[-top_k:]
+    return [chunks[i] for i in top_indices]
 def generate_chunked_response(model, prompt, max_tokens=1000, max_chunks=5):
     full_response = ""
     for i in range(max_chunks):
     # Rough estimate: 1 token ~= 4 characters
     return len(text) // 4
+def ask_question(question: str, temperature: float, top_p: float, repetition_penalty: float, web_search: bool, chatbot: EnhancedContextDrivenChatbot) -> str:
     model = get_model(temperature, top_p, repetition_penalty)
     chatbot.model = model
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
         search_results = google_search(contextualized_question, num_results=3)
+        context_chunks = []
+        for result in search_results:
+            if result["text"]:
+                context_chunks.extend(chunk_text(result["text"]))
+        relevant_chunks = get_most_relevant_chunks(question, context_chunks)
+        prompt_parts = [
+            f"Question: {question}",
+            f"Conversation Context: {chatbot.get_context()[-1000:]}",  # Last 1000 characters
+            "Relevant Web Search Results:"
+        ]
+        for chunk in relevant_chunks:
+            if len(' '.join(prompt_parts)) + len(chunk) < MAX_PROMPT_CHARS:
+                prompt_parts.append(chunk)
+            else:
                 break
+        if instructions:
+            prompt_parts.append(f"User Instructions: {instructions}")
+        prompt_template = """
+        Answer the question based on the following information:
+        {context}
+        Provide a concise and relevant answer to the question.
+        """
+        formatted_prompt = prompt_template.format(context='\n'.join(prompt_parts))
+        # Generate response using the model
+        full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
+        answer = extract_answer(full_response, instructions)
         # Update chatbot context with the answer
         chatbot.add_to_history(answer)
         return answer
     else:  # PDF document chat