GoogleLeak / app.py
cyberandy's picture
Update app.py
cef04ee verified
import streamlit as st
import openai
import asyncio
import re
# Apply the asyncio patch if required
import nest_asyncio
nest_asyncio.apply()
# OpenAI Client Initialization with API Key from Streamlit secrets
client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])
# Assistant ID
ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"
def analyze_query(input_query: str) -> list:
"""Analyze the initial query and generate a list of three detailed queries"""
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a helpful SEO assistant willing to understand Google's massive data leak documentation.",
},
{
"role": "user",
"content": f"Analyze this query and suggest three specific sub-queries: {input_query}",
},
],
)
return response.choices[0].message.content.strip().split("\n")
async def fetch_query_result(query: str) -> str:
"""Fetch result for a single query using OpenAI Assistant"""
try:
thread = client.beta.threads.create(
messages=[{"role": "user", "content": query}]
)
run = client.beta.threads.runs.create(
thread_id=thread.id, assistant_id=ASSISTANT_ID
)
while run.status != "completed":
run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
await asyncio.sleep(5) # Correctly await the sleep call
messages = client.beta.threads.messages.list(thread.id)
report = []
for message in messages.data:
if message.role == "assistant":
for content_block in message.content:
if "text" in dir(content_block) and "value" in dir(
content_block.text
):
cleaned_text = re.sub(
r"【\d+:\d+†source】", "", content_block.text.value
)
report.append(cleaned_text)
return "\n".join(report)
except Exception as e:
return f"Error during query execution: {str(e)}"
def generate_final_response(results):
"""Generate a final response based on the results from multiple queries"""
combined_text = " ".join(results)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "You are a helpful SEO assistant analyzing the leaked 2,500 internal Google Search documents on Search Engine Optimization.",
},
{
"role": "user",
"content": f"Synthesize the following information into a comprehensive report: {combined_text}",
},
],
)
return response.choices[0].message.content.strip()
async def handle_query(input_query: str):
"""Main handler to process the initial query and generate final report"""
try:
queries = analyze_query(input_query)
# Display sub-queries in an expander
with st.expander("Reasoning > Generated Sub-Queries"):
for i, query in enumerate(queries):
st.write(f"{query}")
# Proceed to fetch results for each sub-query
results = await asyncio.gather(
*[fetch_query_result(query) for query in queries]
)
final_report = await asyncio.to_thread(generate_final_response, results)
return final_report
except Exception as e:
return f"Error during report generation: {str(e)}"
def run_async_query(input_query):
loop = asyncio.get_event_loop()
report = loop.run_until_complete(handle_query(input_query))
return report
# Streamlit interface
st.title("Google Leak Reporting Tool")
query = st.text_area(
"Enter your research query:",
"Extract all the information about how the ranking for internal links works.",
height=150,
)
if st.button("Generate Report"):
if not query.strip():
st.warning("Please enter a query to generate a report.")
else:
with st.spinner("Generating report..."):
report = run_async_query(query)
if report:
st.success("Report generated successfully!")
st.write(report)
st.download_button(
"Download Report as Text File",
data=report,
file_name="research_report.txt",
mime="text/plain",
)
else:
st.error("Failed to generate report or report is empty.")