Spaces:

WordLift
/

GoogleLeak

Running

App Files Files Community

GoogleLeak / app.py

cyberandy

Update app.py

cef04ee verified 9 months ago

raw

history blame contribute delete

4.72 kB

	import streamlit as st
	import openai
	import asyncio
	import re

	# Apply the asyncio patch if required
	import nest_asyncio

	nest_asyncio.apply()

	# OpenAI Client Initialization with API Key from Streamlit secrets
	client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])

	# Assistant ID
	ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"


	def analyze_query(input_query: str) -> list:
	"""Analyze the initial query and generate a list of three detailed queries"""
	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful SEO assistant willing to understand Google's massive data leak documentation.",
	},
	{
	"role": "user",
	"content": f"Analyze this query and suggest three specific sub-queries: {input_query}",
	},
	],
	)
	return response.choices[0].message.content.strip().split("\n")


	async def fetch_query_result(query: str) -> str:
	"""Fetch result for a single query using OpenAI Assistant"""
	try:
	thread = client.beta.threads.create(
	messages=[{"role": "user", "content": query}]
	)
	run = client.beta.threads.runs.create(
	thread_id=thread.id, assistant_id=ASSISTANT_ID
	)
	while run.status != "completed":
	run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
	await asyncio.sleep(5) # Correctly await the sleep call
	messages = client.beta.threads.messages.list(thread.id)
	report = []
	for message in messages.data:
	if message.role == "assistant":
	for content_block in message.content:
	if "text" in dir(content_block) and "value" in dir(
	content_block.text
	):
	cleaned_text = re.sub(
	r"【\d+:\d+†source】", "", content_block.text.value
	)
	report.append(cleaned_text)
	return "\n".join(report)
	except Exception as e:
	return f"Error during query execution: {str(e)}"


	def generate_final_response(results):
	"""Generate a final response based on the results from multiple queries"""
	combined_text = " ".join(results)
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful SEO assistant analyzing the leaked 2,500 internal Google Search documents on Search Engine Optimization.",
	},
	{
	"role": "user",
	"content": f"Synthesize the following information into a comprehensive report: {combined_text}",
	},
	],
	)
	return response.choices[0].message.content.strip()


	async def handle_query(input_query: str):
	"""Main handler to process the initial query and generate final report"""
	try:
	queries = analyze_query(input_query)
	# Display sub-queries in an expander
	with st.expander("Reasoning > Generated Sub-Queries"):
	for i, query in enumerate(queries):
	st.write(f"{query}")
	# Proceed to fetch results for each sub-query
	results = await asyncio.gather(
	*[fetch_query_result(query) for query in queries]
	)
	final_report = await asyncio.to_thread(generate_final_response, results)
	return final_report
	except Exception as e:
	return f"Error during report generation: {str(e)}"


	def run_async_query(input_query):
	loop = asyncio.get_event_loop()
	report = loop.run_until_complete(handle_query(input_query))
	return report


	# Streamlit interface
	st.title("Google Leak Reporting Tool")

	query = st.text_area(
	"Enter your research query:",
	"Extract all the information about how the ranking for internal links works.",
	height=150,
	)

	if st.button("Generate Report"):
	if not query.strip():
	st.warning("Please enter a query to generate a report.")
	else:
	with st.spinner("Generating report..."):
	report = run_async_query(query)
	if report:
	st.success("Report generated successfully!")
	st.write(report)
	st.download_button(
	"Download Report as Text File",
	data=report,
	file_name="research_report.txt",
	mime="text/plain",
	)
	else:
	st.error("Failed to generate report or report is empty.")