Spaces:
Runtime error
Runtime error
File size: 2,995 Bytes
3505899 d6579b5 3505899 d6579b5 f9159cc c8114a5 cdc5bd9 c8114a5 39b45c6 a7e7db9 39b45c6 c8114a5 3505899 d19e286 23b48d0 490445b 3505899 490445b 3505899 6678ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from AssistantService import GPTAssistant
from openai.error import AuthenticationError
import streamlit as st
import configparser
import os
config = configparser.ConfigParser()
config.read('config.ini')
if 'DEFAULT' in config:
assistant_api_key = config['DEFAULT'].get('API-KEY', '')
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*")
st.write("")
with st.expander(label="Check out the video demo"):
yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc")
info_text = """
**Quick start** \n
Fill the input with the HTML code you want to extract data from
Example below:
"""
st.write(info_text)
st.image("https://j.gifs.com/gpqvPl.gif")
if assistant_api_key == '':
assistant_api_key = st.text_input("Paste your API key here:")
if assistant_api_key:
gpt_assistant = GPTAssistant(assistant_api_key)
else:
gpt_assistant = GPTAssistant(assistant_api_key)
html_content = st.text_input("Paste your piece of HTML here:")
extract_button = st.button("Extract data format")
if html_content and extract_button:
try:
output = gpt_assistant.chain_response_format(html_content)
st.session_state['output_format'] = output
except NameError:
st.write("Complete the API key field")
except AuthenticationError:
st.write("Invalid API key")
if 'output_format' in st.session_state:
output_format = st.code(st.session_state['output_format'], language="json")
if st.button("Generate the code"):
try:
python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content)
st.session_state['code_generated'] = python_code
st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)"
except NameError:
st.write("Complete the API key field")
except AuthenticationError:
st.write("Invalid API key")
if 'code_generated' in st.session_state:
python_function_label = st.write("Here is your python function:")
code_generated = st.code(st.session_state['code_generated'],language="python")
full_content = st.text_input("Paste your complete HTML here:")
test_code = st.button("Test the code")
if full_content and test_code:
html_data = full_content
result = None
exec(st.session_state['code_generated_exec'], globals())
if result:
st.write("data extracted successfully")
# show data in table
st.table(result)
else:
st.write("error extracting data")
|