SearchGPTTest

Sleeping

App Files Files Community

Shreyas094 commited on Jul 29, 2024

Commit

806791d

verified ·

1 Parent(s): 5bab160

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -86

app.py CHANGED Viewed

@@ -116,38 +116,28 @@ def update_vectors(files, parser):
         label="Select documents to query"
     )
-def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, temperature=0.2, should_stop=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
     full_response = ""
-    continuation_prompt = prompt
-    for i in range(num_calls):
-        print(f"Starting API call {i+1}")
-        if should_stop:
-            print("Stop clicked, breaking loop")
-            break
-        if i > 0:
-            continuation_prompt = f"""
-            Previous response: {full_response}
-            Original query: {prompt}
-            Please continue the response from where you left off, maintaining coherence and avoiding repetition.
-            """
-        try:
-            if model == "@cf/meta/llama-3.1-8b-instruct":
-                # Cloudflare API logic
                 response = requests.post(
                     f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
                     headers={"Authorization": f"Bearer {API_TOKEN}"},
                     json={
                         "stream": True,
-                        "messages": [
-                            {"role": "system", "content": "You are a friendly assistant"},
-                            {"role": "user", "content": continuation_prompt}
-                        ],
                         "max_tokens": max_tokens,
                         "temperature": temperature
                     },
@@ -163,13 +153,22 @@ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, tempe
                             json_data = json.loads(line.decode('utf-8').split('data: ')[1])
                             chunk = json_data['response']
                             full_response += chunk
                         except json.JSONDecodeError:
                             continue
-            else:
-                # Hugging Face API logic
-                client = InferenceClient(model, token=huggingface_token)
-                messages = [{"role": "user", "content": continuation_prompt}]
                 for message in client.chat_completion(
                     messages=messages,
                     max_tokens=max_tokens,
@@ -182,17 +181,17 @@ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, tempe
                     if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                         chunk = message.choices[0].delta.content
                         full_response += chunk
-            print(f"API call {i+1} completed")
-        except Exception as e:
-            print(f"Error in generating response: {str(e)}")
-    # Clean up the response (existing code)
     clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
     clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
-    # Remove duplicate paragraphs and sentences (existing code)
     paragraphs = clean_response.split('\n\n')
     unique_paragraphs = []
     for paragraph in paragraphs:
@@ -207,7 +206,7 @@ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, tempe
     final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
-    return final_response
 def duckduckgo_search(query):
     with DDGS() as ddgs:
@@ -245,60 +244,28 @@ def retry_last_response(history, use_web_search, model, temperature, num_calls):
     return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
-def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
-    logging.info(f"User Query: {message}")
-    logging.info(f"Model Used: {model}")
-    logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
-    logging.info(f"Selected Documents: {selected_docs}")
     try:
         if use_web_search:
             for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
                 response = f"{main_content}\n\n{sources}"
-                first_line = response.split('\n')[0] if response else ''
-                logging.info(f"Generated Response (first line): {first_line}")
-                yield response
         else:
-            embed = get_embeddings()
-            if os.path.exists("faiss_database"):
-                database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
-                retriever = database.as_retriever()
-                # Filter relevant documents based on user selection
-                all_relevant_docs = retriever.get_relevant_documents(message)
-                relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
-                if not relevant_docs:
-                    yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
-                    return
-                context_str = "\n".join([doc.page_content for doc in relevant_docs])
-            else:
-                context_str = "No documents available."
-                yield "No documents available. Please upload PDF documents to answer questions."
-                return
-            if model == "@cf/meta/llama-3.1-8b-instruct":
-                # Use Cloudflare API
-                for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
-                    first_line = partial_response.split('\n')[0] if partial_response else ''
-                    logging.info(f"Generated Response (first line): {first_line}")
-                    yield partial_response
-            else:
-                # Use Hugging Face API
-                for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
-                    first_line = partial_response.split('\n')[0] if partial_response else ''
-                    logging.info(f"Generated Response (first line): {first_line}")
-                    yield partial_response
     except Exception as e:
-        logging.error(f"Error with {model}: {str(e)}")
-        if "microsoft/Phi-3-mini-4k-instruct" in model:
-            logging.info("Falling back to Mistral model due to Phi-3 error")
-            fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
-            yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
-        else:
-            yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
 logging.basicConfig(level=logging.DEBUG)
@@ -456,6 +423,31 @@ def vote(data: gr.LikeData):
     else:
         print(f"You downvoted this response: {data.value}")
 css = """
 /* Add your custom CSS here */
 """
@@ -481,7 +473,7 @@ demo = gr.ChatInterface(
         gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
         gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
         use_web_search,
-        document_selector  # Add the document selector to the chat interface
     ],
     title="AI-powered Web Search and PDF Chat Assistant",
     description="Chat with your PDFs or use web search to answer questions.",
@@ -529,6 +521,19 @@ with demo:
                         inputs=[file_input, parser_dropdown],
                         outputs=[update_output, document_selector])
     gr.Markdown(
     """
     ## How to use
@@ -539,6 +544,7 @@ with demo:
     5. Toggle "Use Web Search" to switch between PDF chat and web search.
     6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
     7. Use the provided examples or ask your own questions.
     """
     )

         label="Select documents to query"
     )
+def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=1, temperature=0.2, should_stop=False, continuation=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
     full_response = ""
+    messages = [{"role": "user", "content": prompt}]
+    if continuation:
+        messages.insert(0, {"role": "system", "content": "This is a continuation of a previous response. Please continue from where you left off, maintaining coherence and avoiding repetition."})
+    if model == "@cf/meta/llama-3.1-8b-instruct":
+        # Cloudflare API logic
+        for i in range(num_calls):
+            print(f"Starting Cloudflare API call {i+1}")
+            if should_stop:
+                print("Stop clicked, breaking loop")
+                break
+            try:
                 response = requests.post(
                     f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
                     headers={"Authorization": f"Bearer {API_TOKEN}"},
                     json={
                         "stream": True,
+                        "messages": messages,
                         "max_tokens": max_tokens,
                         "temperature": temperature
                     },
                             json_data = json.loads(line.decode('utf-8').split('data: ')[1])
                             chunk = json_data['response']
                             full_response += chunk
+                            yield full_response
                         except json.JSONDecodeError:
                             continue
+                print(f"Cloudflare API call {i+1} completed")
+            except Exception as e:
+                print(f"Error in generating response from Cloudflare: {str(e)}")
+    else:
+        # Original Hugging Face API logic
+        client = InferenceClient(model, token=huggingface_token)
+        for i in range(num_calls):
+            print(f"Starting Hugging Face API call {i+1}")
+            if should_stop:
+                print("Stop clicked, breaking loop")
+                break
+            try:
                 for message in client.chat_completion(
                     messages=messages,
                     max_tokens=max_tokens,
                     if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                         chunk = message.choices[0].delta.content
                         full_response += chunk
+                        yield full_response
+                print(f"Hugging Face API call {i+1} completed")
+            except Exception as e:
+                print(f"Error in generating response from Hugging Face: {str(e)}")
+    # Clean up the response
     clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
     clean_response = clean_response.replace("Using the following context:", "").strip()
     clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
+    # Remove duplicate paragraphs and sentences
     paragraphs = clean_response.split('\n\n')
     unique_paragraphs = []
     for paragraph in paragraphs:
     final_response = '\n\n'.join(unique_paragraphs)
     print(f"Final clean response: {final_response[:100]}...")
+    yield final_response
 def duckduckgo_search(query):
     with DDGS() as ddgs:
     return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
+def respond(message, history, use_web_search, model, temperature, num_calls, selected_docs, continuation=False):
+    if not message.strip():
+        return "", history
+    history = history + [(message, "")]
     try:
         if use_web_search:
             for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
                 response = f"{main_content}\n\n{sources}"
+                history[-1] = (message, response)
+                yield history, gr.update(visible=True)  # Make Continue Generation button visible
         else:
+            for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature, continuation=continuation):
+                history[-1] = (message, partial_response)
+                yield history, gr.update(visible=True)  # Make Continue Generation button visible
+    except gr.CancelledError:
+        yield history, gr.update(visible=False)
     except Exception as e:
+        logging.error(f"Unexpected error in respond: {str(e)}")
+        history[-1] = (message, f"An unexpected error occurred: {str(e)}")
+        yield history, gr.update(visible=False)
 logging.basicConfig(level=logging.DEBUG)
     else:
         print(f"You downvoted this response: {data.value}")
+def continue_generation(history, model, temperature, num_calls, use_web_search, selected_docs):
+    if not history:
+        return history
+    last_user_msg = history[-1][0]
+    last_ai_response = history[-1][1]
+    continuation_prompt = f"""
+    Previous response: {last_ai_response}
+    Original query: {last_user_msg}
+    Please continue the response from where you left off, maintaining coherence and avoiding repetition.
+    """
+    try:
+        for response in respond(continuation_prompt, history[:-1], use_web_search, model, temperature, num_calls, selected_docs, continuation=True):
+            new_response = f"{last_ai_response}\n\n{response[-1][1]}"
+            history[-1] = (last_user_msg, new_response)
+            yield history
+    except Exception as e:
+        logging.error(f"Error in continue_generation: {str(e)}")
+        history[-1] = (last_user_msg, f"{last_ai_response}\n\nError continuing generation: {str(e)}")
+        yield history
 css = """
 /* Add your custom CSS here */
 """
         gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
         gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
         use_web_search,
+        document_selector
     ],
     title="AI-powered Web Search and PDF Chat Assistant",
     description="Chat with your PDFs or use web search to answer questions.",
                         inputs=[file_input, parser_dropdown],
                         outputs=[update_output, document_selector])
+    # Add the Continue Generation button
+    continue_btn = gr.Button("Continue Generation", visible=False)
+    # Add the click event for the Continue Generation button
+    continue_btn.click(continue_generation,
+                       inputs=[demo.chatbot,
+                               gr.Dropdown(choices=MODELS, label="Select Model"),
+                               gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
+                               gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
+                               use_web_search,
+                               document_selector],
+                       outputs=[demo.chatbot])
     gr.Markdown(
     """
     ## How to use
     5. Toggle "Use Web Search" to switch between PDF chat and web search.
     6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
     7. Use the provided examples or ask your own questions.
+    8. If a response is incomplete, click "Continue Generation" for more information.
     """
     )