Spaces:

WordLift
/

GoogleLeak

Running

File size: 3,366 Bytes

c0a2f04
e720e43
bbd760a
e720e43
c0a2f04
e720e43
 
c0a2f04
5654a6b
 
e720e43
 
dbcc073
e720e43
 
dbcc073
5654a6b
e720e43
 
07df051
e720e43
 
07df051
e1ecda6
e720e43
 
 
 
 
 
 
 
187985b
e720e43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1ecda6
 
07df051
527848d
e720e43
bbd760a
 
 
 
e720e43
bbd760a
 
 
c0a2f04
187985b
c0a2f04
e1ecda6
04d19e0
c0a2f04
04d19e0
187985b
c0a2f04
 
5229ff8
c0a2f04
bbd760a
c0a2f04
 
187985b
e720e43
 
187985b
 
 
 
 
 
 
 
 
 
e720e43

import streamlit as st
import openai
import asyncio
import re

# Apply the asyncio patch if required
import nest_asyncio

nest_asyncio.apply()

# OpenAI Client Initialization with API Key from Streamlit secrets
client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])

# Assistant ID
ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"


# Define the asynchronous function to interact with the OpenAI assistant
async def fetch_report(query: str) -> str:
    """
    Interact with OpenAI Assistant to generate a report based on the provided query.
    Clean the text by removing source annotations.
    """
    try:
        # Create a Thread with an initial user message
        thread = client.beta.threads.create(
            messages=[{"role": "user", "content": query}]
        )

        # Start the Assistant
        run = client.beta.threads.runs.create(
            thread_id=thread.id, assistant_id=ASSISTANT_ID
        )

        # Wait for the run to complete
        while run.status != "completed":
            run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
            await asyncio.sleep(5)  # Delay to prevent excessive polling

        # Retrieve the Messages added by the Assistant to the Thread
        thread_messages = client.beta.threads.messages.list(thread.id)

        # Initialize an empty string to collect the cleaned report
        report = []
        for message in thread_messages.data:
            if message.role == "assistant":
                for content_block in message.content:
                    if "text" in dir(content_block) and "value" in dir(
                        content_block.text
                    ):
                        # Remove source citations
                        cleaned_text = re.sub(
                            r"【\d+:\d+†source】", "", content_block.text.value
                        )
                        report.append(cleaned_text)
        return "\n".join(report)
    except Exception as e:
        return f"Error during research: {str(e)}"


def run_report_generation(query):
    """
    Helper function to run async fetch_report function.
    """
    loop = asyncio.get_event_loop()
    report = loop.run_until_complete(fetch_report(query))
    return report


# Streamlit interface
st.title("Google Leak Reporting Tool")

# User input for the query using a text area
query = st.text_area(
    "Enter your research query:",
    "Extract all the information about how the ranking for internal links works.",
    height=150,  # Adjustable height
)

# Start the report generation process
if st.button("Generate Report"):
    if not query.strip():
        st.warning("Please enter a query to generate a report.")
    else:
        with st.spinner("Generating report..."):
            report = run_report_generation(query)
            if report:
                st.success("Report generated successfully!")
                st.write(report)  # Display the report in the app
                # Create a download button for the report
                st.download_button(
                    label="Download Report as Text File",
                    data=report,
                    file_name="research_report.txt",
                    mime="text/plain",
                )
            else:
                st.error("Failed to generate report or report is empty.")