Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
@@ -7,17 +7,32 @@ import os
|
|
7 |
# Update with your OpenAI API key
|
8 |
os.environ["OPENAI_API_KEY"] = "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"
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Function to fetch website content using the updated actor
|
11 |
def fetch_website_content(website_url):
|
12 |
apify_client = ApifyClient("apify_api_uz0y556N4IG2aLcESj67kmnGSUpHF12XAkLp")
|
13 |
-
run_input = {
|
|
|
|
|
|
|
14 |
run = apify_client.actor("moJRLRc85AitArpNN").call(run_input=run_input)
|
15 |
items = list(apify_client.dataset(run["defaultDatasetId"]).iterate_items())
|
16 |
return items if items else None
|
17 |
|
18 |
# Fetch and index website content
|
19 |
content = fetch_website_content("https://python.langchain.com/en/latest/")
|
20 |
-
documents = [Document(page_content=item["
|
21 |
index = VectorstoreIndexCreator().from_loaders([documents])
|
22 |
|
23 |
# Function for the Gradio UI
|
|
|
7 |
# Update with your OpenAI API key
|
8 |
os.environ["OPENAI_API_KEY"] = "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"
|
9 |
|
10 |
+
# Page Function to extract website content
|
11 |
+
page_function_code = """
|
12 |
+
function pageFunction(context) {
|
13 |
+
const $ = context.jQuery;
|
14 |
+
const data = {
|
15 |
+
title: $('title').text(),
|
16 |
+
content: $('body').text()
|
17 |
+
};
|
18 |
+
return data;
|
19 |
+
}
|
20 |
+
"""
|
21 |
+
|
22 |
# Function to fetch website content using the updated actor
|
23 |
def fetch_website_content(website_url):
|
24 |
apify_client = ApifyClient("apify_api_uz0y556N4IG2aLcESj67kmnGSUpHF12XAkLp")
|
25 |
+
run_input = {
|
26 |
+
"startUrls": [{"url": website_url}],
|
27 |
+
"pageFunction": page_function_code
|
28 |
+
}
|
29 |
run = apify_client.actor("moJRLRc85AitArpNN").call(run_input=run_input)
|
30 |
items = list(apify_client.dataset(run["defaultDatasetId"]).iterate_items())
|
31 |
return items if items else None
|
32 |
|
33 |
# Fetch and index website content
|
34 |
content = fetch_website_content("https://python.langchain.com/en/latest/")
|
35 |
+
documents = [Document(page_content=item["content"] or "", metadata={"source": website_url}) for item in content]
|
36 |
index = VectorstoreIndexCreator().from_loaders([documents])
|
37 |
|
38 |
# Function for the Gradio UI
|