Spaces:

WordLift
/

GoogleLeak

Running

App Files Files Community

cyberandy commited on Jun 5, 2024

Commit

cef04ee

verified ·

1 Parent(s): e720e43

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -32

app.py CHANGED Viewed

@@ -15,81 +15,116 @@ client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])
 ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"
-# Define the asynchronous function to interact with the OpenAI assistant
-async def fetch_report(query: str) -> str:
-    """
-    Interact with OpenAI Assistant to generate a report based on the provided query.
-    Clean the text by removing source annotations.
-    """
     try:
-        # Create a Thread with an initial user message
         thread = client.beta.threads.create(
             messages=[{"role": "user", "content": query}]
         )
-        # Start the Assistant
         run = client.beta.threads.runs.create(
             thread_id=thread.id, assistant_id=ASSISTANT_ID
         )
-        # Wait for the run to complete
         while run.status != "completed":
             run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
-            await asyncio.sleep(5)  # Delay to prevent excessive polling
-        # Retrieve the Messages added by the Assistant to the Thread
-        thread_messages = client.beta.threads.messages.list(thread.id)
-        # Initialize an empty string to collect the cleaned report
         report = []
-        for message in thread_messages.data:
             if message.role == "assistant":
                 for content_block in message.content:
                     if "text" in dir(content_block) and "value" in dir(
                         content_block.text
                     ):
-                        # Remove source citations
                         cleaned_text = re.sub(
                             r"【\d+:\d+†source】", "", content_block.text.value
                         )
                         report.append(cleaned_text)
         return "\n".join(report)
     except Exception as e:
-        return f"Error during research: {str(e)}"
-def run_report_generation(query):
-    """
-    Helper function to run async fetch_report function.
-    """
     loop = asyncio.get_event_loop()
-    report = loop.run_until_complete(fetch_report(query))
     return report
 # Streamlit interface
 st.title("Google Leak Reporting Tool")
-# User input for the query using a text area
 query = st.text_area(
     "Enter your research query:",
     "Extract all the information about how the ranking for internal links works.",
-    height=150,  # Adjustable height
 )
-# Start the report generation process
 if st.button("Generate Report"):
     if not query.strip():
         st.warning("Please enter a query to generate a report.")
     else:
         with st.spinner("Generating report..."):
-            report = run_report_generation(query)
             if report:
                 st.success("Report generated successfully!")
-                st.write(report)  # Display the report in the app
-                # Create a download button for the report
                 st.download_button(
-                    label="Download Report as Text File",
                     data=report,
                     file_name="research_report.txt",
                     mime="text/plain",

 ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"
+def analyze_query(input_query: str) -> list:
+    """Analyze the initial query and generate a list of three detailed queries"""
+    response = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful SEO assistant willing to understand Google's massive data leak documentation.",
+            },
+            {
+                "role": "user",
+                "content": f"Analyze this query and suggest three specific sub-queries: {input_query}",
+            },
+        ],
+    )
+    return response.choices[0].message.content.strip().split("\n")
+async def fetch_query_result(query: str) -> str:
+    """Fetch result for a single query using OpenAI Assistant"""
     try:
         thread = client.beta.threads.create(
             messages=[{"role": "user", "content": query}]
         )
         run = client.beta.threads.runs.create(
             thread_id=thread.id, assistant_id=ASSISTANT_ID
         )
         while run.status != "completed":
             run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
+            await asyncio.sleep(5)  # Correctly await the sleep call
+        messages = client.beta.threads.messages.list(thread.id)
         report = []
+        for message in messages.data:
             if message.role == "assistant":
                 for content_block in message.content:
                     if "text" in dir(content_block) and "value" in dir(
                         content_block.text
                     ):
                         cleaned_text = re.sub(
                             r"【\d+:\d+†source】", "", content_block.text.value
                         )
                         report.append(cleaned_text)
         return "\n".join(report)
     except Exception as e:
+        return f"Error during query execution: {str(e)}"
+def generate_final_response(results):
+    """Generate a final response based on the results from multiple queries"""
+    combined_text = " ".join(results)
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful SEO assistant analyzing the leaked 2,500 internal Google Search documents on Search Engine Optimization.",
+            },
+            {
+                "role": "user",
+                "content": f"Synthesize the following information into a comprehensive report: {combined_text}",
+            },
+        ],
+    )
+    return response.choices[0].message.content.strip()
+async def handle_query(input_query: str):
+    """Main handler to process the initial query and generate final report"""
+    try:
+        queries = analyze_query(input_query)
+        # Display sub-queries in an expander
+        with st.expander("Reasoning > Generated Sub-Queries"):
+            for i, query in enumerate(queries):
+                st.write(f"{query}")
+        # Proceed to fetch results for each sub-query
+        results = await asyncio.gather(
+            *[fetch_query_result(query) for query in queries]
+        )
+        final_report = await asyncio.to_thread(generate_final_response, results)
+        return final_report
+    except Exception as e:
+        return f"Error during report generation: {str(e)}"
+def run_async_query(input_query):
     loop = asyncio.get_event_loop()
+    report = loop.run_until_complete(handle_query(input_query))
     return report
 # Streamlit interface
 st.title("Google Leak Reporting Tool")
 query = st.text_area(
     "Enter your research query:",
     "Extract all the information about how the ranking for internal links works.",
+    height=150,
 )
 if st.button("Generate Report"):
     if not query.strip():
         st.warning("Please enter a query to generate a report.")
     else:
         with st.spinner("Generating report..."):
+            report = run_async_query(query)
             if report:
                 st.success("Report generated successfully!")
+                st.write(report)
                 st.download_button(
+                    "Download Report as Text File",
                     data=report,
                     file_name="research_report.txt",
                     mime="text/plain",