GianJSX commited on
Commit
c739942
·
1 Parent(s): 5a6cb7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -15,22 +15,25 @@ os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
15
  os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
16
  os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
17
 
18
- st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*")
19
-
20
- with st.expander(label="Check out the video demo"):
21
- yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc")
22
-
23
- info_text = """
24
- **Quick start** \n
25
- Fill the input with <HTML code>.
26
- * Choose a repeating element on the page, like a product on a list.
27
- * Inspect the HTML code and copy the element.
28
-
29
- After generating the "output format" and the code, paste the complete HTML code of the page in the last input to test it
30
- """
31
- st.write(info_text)
32
- st.image("https://j.gifs.com/gpqvPl.gif")
 
 
33
 
 
34
 
35
  if assistant_api_key == '':
36
  assistant_api_key = st.secrets["API_KEY"]
@@ -39,15 +42,22 @@ if assistant_api_key == '':
39
  else:
40
  gpt_assistant = GPTAssistant(assistant_api_key)
41
 
 
 
 
 
 
 
 
42
  html_content = None
43
  # check if html_content is an url, and show error if it is
44
- @traceable(run_type="tool")
45
  def html_content_input():
46
  html_content = st.text_input("Paste the HTML tags of the item you want to extract:", max_chars=10000, help="example: <li>Product 1 </li>, watch the video above")
47
  if html_content:
48
  if html_content.startswith("http"):
49
  st.write("Please paste the HTML piece code, not the URL")
50
- html_content = None
51
 
52
  return st.button("Generate output format & code")
53
 
 
15
  os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
16
  os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
17
 
18
+ @traceable(run_type="tool")
19
+ def start():
20
+ st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*")
21
+
22
+ with st.expander(label="Check out the video demo"):
23
+ yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc")
24
+
25
+ info_text = """
26
+ **Quick start** \n
27
+ Fill the input with <HTML code>.
28
+ * Choose a repeating element on the page, like a product on a list.
29
+ * Inspect the HTML code and copy the element.
30
+
31
+ After generating the "output format" and the code, paste the complete HTML code of the page in the last input to test it
32
+ """
33
+ st.write(info_text)
34
+ st.image("https://j.gifs.com/gpqvPl.gif")
35
 
36
+ start()
37
 
38
  if assistant_api_key == '':
39
  assistant_api_key = st.secrets["API_KEY"]
 
42
  else:
43
  gpt_assistant = GPTAssistant(assistant_api_key)
44
 
45
+ @traceable(run_type="tool")
46
+ def invalid_input(html):
47
+ # TODO: more checks
48
+ if html.startswith("http"):
49
+ return True
50
+
51
+
52
  html_content = None
53
  # check if html_content is an url, and show error if it is
54
+
55
  def html_content_input():
56
  html_content = st.text_input("Paste the HTML tags of the item you want to extract:", max_chars=10000, help="example: <li>Product 1 </li>, watch the video above")
57
  if html_content:
58
  if html_content.startswith("http"):
59
  st.write("Please paste the HTML piece code, not the URL")
60
+ invalid_input(html)
61
 
62
  return st.button("Generate output format & code")
63