Spaces:

WordLift
/

GoogleLeak

Running

File size: 4,717 Bytes

c0a2f04
e720e43
bbd760a
e720e43
c0a2f04
e720e43
 
c0a2f04
5654a6b
 
e720e43
 
dbcc073
e720e43
 
dbcc073
5654a6b
cef04ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1ecda6
e720e43
 
 
 
 
187985b
e720e43
 
cef04ee
 
e720e43
cef04ee
e720e43
 
 
 
 
 
 
 
 
 
e1ecda6
cef04ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07df051
527848d
cef04ee
bbd760a
cef04ee
bbd760a
 
 
c0a2f04
187985b
c0a2f04
04d19e0
c0a2f04
04d19e0
cef04ee
c0a2f04
 
 
bbd760a
c0a2f04
 
187985b
cef04ee
e720e43
187985b
cef04ee
187985b
cef04ee
187985b
 
 
 
 
e720e43

import streamlit as st
import openai
import asyncio
import re

# Apply the asyncio patch if required
import nest_asyncio

nest_asyncio.apply()

# OpenAI Client Initialization with API Key from Streamlit secrets
client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])

# Assistant ID
ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"


def analyze_query(input_query: str) -> list:
    """Analyze the initial query and generate a list of three detailed queries"""
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful SEO assistant willing to understand Google's massive data leak documentation.",
            },
            {
                "role": "user",
                "content": f"Analyze this query and suggest three specific sub-queries: {input_query}",
            },
        ],
    )
    return response.choices[0].message.content.strip().split("\n")


async def fetch_query_result(query: str) -> str:
    """Fetch result for a single query using OpenAI Assistant"""
    try:
        thread = client.beta.threads.create(
            messages=[{"role": "user", "content": query}]
        )
        run = client.beta.threads.runs.create(
            thread_id=thread.id, assistant_id=ASSISTANT_ID
        )
        while run.status != "completed":
            run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
            await asyncio.sleep(5)  # Correctly await the sleep call
        messages = client.beta.threads.messages.list(thread.id)
        report = []
        for message in messages.data:
            if message.role == "assistant":
                for content_block in message.content:
                    if "text" in dir(content_block) and "value" in dir(
                        content_block.text
                    ):
                        cleaned_text = re.sub(
                            r"【\d+:\d+†source】", "", content_block.text.value
                        )
                        report.append(cleaned_text)
        return "\n".join(report)
    except Exception as e:
        return f"Error during query execution: {str(e)}"


def generate_final_response(results):
    """Generate a final response based on the results from multiple queries"""
    combined_text = " ".join(results)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful SEO assistant analyzing the leaked 2,500 internal Google Search documents on Search Engine Optimization.",
            },
            {
                "role": "user",
                "content": f"Synthesize the following information into a comprehensive report: {combined_text}",
            },
        ],
    )
    return response.choices[0].message.content.strip()


async def handle_query(input_query: str):
    """Main handler to process the initial query and generate final report"""
    try:
        queries = analyze_query(input_query)
        # Display sub-queries in an expander
        with st.expander("Reasoning > Generated Sub-Queries"):
            for i, query in enumerate(queries):
                st.write(f"{query}")
        # Proceed to fetch results for each sub-query
        results = await asyncio.gather(
            *[fetch_query_result(query) for query in queries]
        )
        final_report = await asyncio.to_thread(generate_final_response, results)
        return final_report
    except Exception as e:
        return f"Error during report generation: {str(e)}"


def run_async_query(input_query):
    loop = asyncio.get_event_loop()
    report = loop.run_until_complete(handle_query(input_query))
    return report


# Streamlit interface
st.title("Google Leak Reporting Tool")

query = st.text_area(
    "Enter your research query:",
    "Extract all the information about how the ranking for internal links works.",
    height=150,
)

if st.button("Generate Report"):
    if not query.strip():
        st.warning("Please enter a query to generate a report.")
    else:
        with st.spinner("Generating report..."):
            report = run_async_query(query)
            if report:
                st.success("Report generated successfully!")
                st.write(report)
                st.download_button(
                    "Download Report as Text File",
                    data=report,
                    file_name="research_report.txt",
                    mime="text/plain",
                )
            else:
                st.error("Failed to generate report or report is empty.")