Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,22 +15,25 @@ os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
|
|
15 |
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
|
16 |
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
|
|
|
34 |
|
35 |
if assistant_api_key == '':
|
36 |
assistant_api_key = st.secrets["API_KEY"]
|
@@ -39,15 +42,22 @@ if assistant_api_key == '':
|
|
39 |
else:
|
40 |
gpt_assistant = GPTAssistant(assistant_api_key)
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
html_content = None
|
43 |
# check if html_content is an url, and show error if it is
|
44 |
-
|
45 |
def html_content_input():
|
46 |
html_content = st.text_input("Paste the HTML tags of the item you want to extract:", max_chars=10000, help="example: <li>Product 1 </li>, watch the video above")
|
47 |
if html_content:
|
48 |
if html_content.startswith("http"):
|
49 |
st.write("Please paste the HTML piece code, not the URL")
|
50 |
-
|
51 |
|
52 |
return st.button("Generate output format & code")
|
53 |
|
|
|
15 |
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
|
16 |
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
|
17 |
|
18 |
+
@traceable(run_type="tool")
|
19 |
+
def start():
|
20 |
+
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*")
|
21 |
+
|
22 |
+
with st.expander(label="Check out the video demo"):
|
23 |
+
yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc")
|
24 |
+
|
25 |
+
info_text = """
|
26 |
+
**Quick start** \n
|
27 |
+
Fill the input with <HTML code>.
|
28 |
+
* Choose a repeating element on the page, like a product on a list.
|
29 |
+
* Inspect the HTML code and copy the element.
|
30 |
+
|
31 |
+
After generating the "output format" and the code, paste the complete HTML code of the page in the last input to test it
|
32 |
+
"""
|
33 |
+
st.write(info_text)
|
34 |
+
st.image("https://j.gifs.com/gpqvPl.gif")
|
35 |
|
36 |
+
start()
|
37 |
|
38 |
if assistant_api_key == '':
|
39 |
assistant_api_key = st.secrets["API_KEY"]
|
|
|
42 |
else:
|
43 |
gpt_assistant = GPTAssistant(assistant_api_key)
|
44 |
|
45 |
+
@traceable(run_type="tool")
|
46 |
+
def invalid_input(html):
|
47 |
+
# TODO: more checks
|
48 |
+
if html.startswith("http"):
|
49 |
+
return True
|
50 |
+
|
51 |
+
|
52 |
html_content = None
|
53 |
# check if html_content is an url, and show error if it is
|
54 |
+
|
55 |
def html_content_input():
|
56 |
html_content = st.text_input("Paste the HTML tags of the item you want to extract:", max_chars=10000, help="example: <li>Product 1 </li>, watch the video above")
|
57 |
if html_content:
|
58 |
if html_content.startswith("http"):
|
59 |
st.write("Please paste the HTML piece code, not the URL")
|
60 |
+
invalid_input(html)
|
61 |
|
62 |
return st.button("Generate output format & code")
|
63 |
|