Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,81 +15,116 @@ client = openai.OpenAI(api_key=st.secrets["OPENAI_API_KEY"])
|
|
15 |
ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"
|
16 |
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
try:
|
25 |
-
# Create a Thread with an initial user message
|
26 |
thread = client.beta.threads.create(
|
27 |
messages=[{"role": "user", "content": query}]
|
28 |
)
|
29 |
-
|
30 |
-
# Start the Assistant
|
31 |
run = client.beta.threads.runs.create(
|
32 |
thread_id=thread.id, assistant_id=ASSISTANT_ID
|
33 |
)
|
34 |
-
|
35 |
-
# Wait for the run to complete
|
36 |
while run.status != "completed":
|
37 |
run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
|
38 |
-
await asyncio.sleep(5) #
|
39 |
-
|
40 |
-
# Retrieve the Messages added by the Assistant to the Thread
|
41 |
-
thread_messages = client.beta.threads.messages.list(thread.id)
|
42 |
-
|
43 |
-
# Initialize an empty string to collect the cleaned report
|
44 |
report = []
|
45 |
-
for message in
|
46 |
if message.role == "assistant":
|
47 |
for content_block in message.content:
|
48 |
if "text" in dir(content_block) and "value" in dir(
|
49 |
content_block.text
|
50 |
):
|
51 |
-
# Remove source citations
|
52 |
cleaned_text = re.sub(
|
53 |
r"【\d+:\d+†source】", "", content_block.text.value
|
54 |
)
|
55 |
report.append(cleaned_text)
|
56 |
return "\n".join(report)
|
57 |
except Exception as e:
|
58 |
-
return f"Error during
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
-
def
|
62 |
-
"""
|
63 |
-
Helper function to run async fetch_report function.
|
64 |
-
"""
|
65 |
loop = asyncio.get_event_loop()
|
66 |
-
report = loop.run_until_complete(
|
67 |
return report
|
68 |
|
69 |
|
70 |
# Streamlit interface
|
71 |
st.title("Google Leak Reporting Tool")
|
72 |
|
73 |
-
# User input for the query using a text area
|
74 |
query = st.text_area(
|
75 |
"Enter your research query:",
|
76 |
"Extract all the information about how the ranking for internal links works.",
|
77 |
-
height=150,
|
78 |
)
|
79 |
|
80 |
-
# Start the report generation process
|
81 |
if st.button("Generate Report"):
|
82 |
if not query.strip():
|
83 |
st.warning("Please enter a query to generate a report.")
|
84 |
else:
|
85 |
with st.spinner("Generating report..."):
|
86 |
-
report =
|
87 |
if report:
|
88 |
st.success("Report generated successfully!")
|
89 |
-
st.write(report)
|
90 |
-
# Create a download button for the report
|
91 |
st.download_button(
|
92 |
-
|
93 |
data=report,
|
94 |
file_name="research_report.txt",
|
95 |
mime="text/plain",
|
|
|
15 |
ASSISTANT_ID = "asst_jNEWFnROZxSI8ZnL9WDI2yCp"
|
16 |
|
17 |
|
18 |
+
def analyze_query(input_query: str) -> list:
|
19 |
+
"""Analyze the initial query and generate a list of three detailed queries"""
|
20 |
+
response = client.chat.completions.create(
|
21 |
+
model="gpt-3.5-turbo",
|
22 |
+
messages=[
|
23 |
+
{
|
24 |
+
"role": "system",
|
25 |
+
"content": "You are a helpful SEO assistant willing to understand Google's massive data leak documentation.",
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"role": "user",
|
29 |
+
"content": f"Analyze this query and suggest three specific sub-queries: {input_query}",
|
30 |
+
},
|
31 |
+
],
|
32 |
+
)
|
33 |
+
return response.choices[0].message.content.strip().split("\n")
|
34 |
+
|
35 |
+
|
36 |
+
async def fetch_query_result(query: str) -> str:
|
37 |
+
"""Fetch result for a single query using OpenAI Assistant"""
|
38 |
try:
|
|
|
39 |
thread = client.beta.threads.create(
|
40 |
messages=[{"role": "user", "content": query}]
|
41 |
)
|
|
|
|
|
42 |
run = client.beta.threads.runs.create(
|
43 |
thread_id=thread.id, assistant_id=ASSISTANT_ID
|
44 |
)
|
|
|
|
|
45 |
while run.status != "completed":
|
46 |
run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
|
47 |
+
await asyncio.sleep(5) # Correctly await the sleep call
|
48 |
+
messages = client.beta.threads.messages.list(thread.id)
|
|
|
|
|
|
|
|
|
49 |
report = []
|
50 |
+
for message in messages.data:
|
51 |
if message.role == "assistant":
|
52 |
for content_block in message.content:
|
53 |
if "text" in dir(content_block) and "value" in dir(
|
54 |
content_block.text
|
55 |
):
|
|
|
56 |
cleaned_text = re.sub(
|
57 |
r"【\d+:\d+†source】", "", content_block.text.value
|
58 |
)
|
59 |
report.append(cleaned_text)
|
60 |
return "\n".join(report)
|
61 |
except Exception as e:
|
62 |
+
return f"Error during query execution: {str(e)}"
|
63 |
+
|
64 |
+
|
65 |
+
def generate_final_response(results):
|
66 |
+
"""Generate a final response based on the results from multiple queries"""
|
67 |
+
combined_text = " ".join(results)
|
68 |
+
response = client.chat.completions.create(
|
69 |
+
model="gpt-4o",
|
70 |
+
messages=[
|
71 |
+
{
|
72 |
+
"role": "system",
|
73 |
+
"content": "You are a helpful SEO assistant analyzing the leaked 2,500 internal Google Search documents on Search Engine Optimization.",
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"role": "user",
|
77 |
+
"content": f"Synthesize the following information into a comprehensive report: {combined_text}",
|
78 |
+
},
|
79 |
+
],
|
80 |
+
)
|
81 |
+
return response.choices[0].message.content.strip()
|
82 |
+
|
83 |
+
|
84 |
+
async def handle_query(input_query: str):
|
85 |
+
"""Main handler to process the initial query and generate final report"""
|
86 |
+
try:
|
87 |
+
queries = analyze_query(input_query)
|
88 |
+
# Display sub-queries in an expander
|
89 |
+
with st.expander("Reasoning > Generated Sub-Queries"):
|
90 |
+
for i, query in enumerate(queries):
|
91 |
+
st.write(f"{query}")
|
92 |
+
# Proceed to fetch results for each sub-query
|
93 |
+
results = await asyncio.gather(
|
94 |
+
*[fetch_query_result(query) for query in queries]
|
95 |
+
)
|
96 |
+
final_report = await asyncio.to_thread(generate_final_response, results)
|
97 |
+
return final_report
|
98 |
+
except Exception as e:
|
99 |
+
return f"Error during report generation: {str(e)}"
|
100 |
|
101 |
|
102 |
+
def run_async_query(input_query):
|
|
|
|
|
|
|
103 |
loop = asyncio.get_event_loop()
|
104 |
+
report = loop.run_until_complete(handle_query(input_query))
|
105 |
return report
|
106 |
|
107 |
|
108 |
# Streamlit interface
|
109 |
st.title("Google Leak Reporting Tool")
|
110 |
|
|
|
111 |
query = st.text_area(
|
112 |
"Enter your research query:",
|
113 |
"Extract all the information about how the ranking for internal links works.",
|
114 |
+
height=150,
|
115 |
)
|
116 |
|
|
|
117 |
if st.button("Generate Report"):
|
118 |
if not query.strip():
|
119 |
st.warning("Please enter a query to generate a report.")
|
120 |
else:
|
121 |
with st.spinner("Generating report..."):
|
122 |
+
report = run_async_query(query)
|
123 |
if report:
|
124 |
st.success("Report generated successfully!")
|
125 |
+
st.write(report)
|
|
|
126 |
st.download_button(
|
127 |
+
"Download Report as Text File",
|
128 |
data=report,
|
129 |
file_name="research_report.txt",
|
130 |
mime="text/plain",
|