Spaces:
Runtime error
Runtime error
from AssistantService import GPTAssistant | |
from openai.error import AuthenticationError | |
import streamlit as st | |
import configparser | |
import os | |
config = configparser.ConfigParser() | |
config.read('config.ini') | |
if 'DEFAULT' in config: | |
assistant_api_key = config['DEFAULT'].get('API-KEY', '') | |
os.environ["LANGCHAIN_TRACING_V2"]="true" | |
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com" | |
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"] | |
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"] | |
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you. \n *Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)*") | |
st.write("") | |
with st.expander(label="Check out the video demo"): | |
yt_video = st.video("https://www.youtube.com/watch?v=_zeCun4OlCc") | |
info_text = """ | |
**Quick start** \n | |
Fill the input with the HTML code you want to extract data from | |
Example below: | |
""" | |
st.write(info_text) | |
st.image("https://j.gifs.com/gpqvPl.gif") | |
if assistant_api_key == '': | |
assistant_api_key = st.text_input("Paste your API key here:") | |
if assistant_api_key: | |
gpt_assistant = GPTAssistant(assistant_api_key) | |
else: | |
gpt_assistant = GPTAssistant(assistant_api_key) | |
html_content = st.text_input("Paste your piece of HTML here:") | |
extract_button = st.button("Extract data format") | |
if html_content and extract_button: | |
try: | |
output = gpt_assistant.chain_response_format(html_content) | |
st.session_state['output_format'] = output | |
except NameError: | |
st.write("Complete the API key field") | |
except AuthenticationError: | |
st.write("Invalid API key") | |
if 'output_format' in st.session_state: | |
output_format = st.code(st.session_state['output_format'], language="json") | |
if st.button("Generate the code"): | |
try: | |
python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content) | |
st.session_state['code_generated'] = python_code | |
st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)" | |
except NameError: | |
st.write("Complete the API key field") | |
except AuthenticationError: | |
st.write("Invalid API key") | |
if 'code_generated' in st.session_state: | |
python_function_label = st.write("Here is your python function:") | |
code_generated = st.code(st.session_state['code_generated'],language="python") | |
full_content = st.text_input("Paste your complete HTML here:") | |
test_code = st.button("Test the code") | |
if full_content and test_code: | |
html_data = full_content | |
result = None | |
exec(st.session_state['code_generated_exec'], globals()) | |
if result: | |
st.write("data extracted successfully") | |
# show data in table | |
st.table(result) | |
else: | |
st.write("error extracting data") | |