GianJSX's picture
Update app.py
d6579b5
raw
history blame
3.64 kB
from AssistantService import GPTAssistant
from openai.error import AuthenticationError
import streamlit as st
import configparser
import os
config = configparser.ConfigParser()
config.read('config.ini')
if 'DEFAULT' in config:
assistant_api_key = config['DEFAULT'].get('API-KEY', '')
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"]
os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"]
st.title("Web Scraping Assistant")
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you.")
st.write("Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)")
if assistant_api_key == '':
assistant_api_key = st.text_input("Paste your API key here:")
if assistant_api_key:
gpt_assistant = GPTAssistant(assistant_api_key)
else:
gpt_assistant = GPTAssistant(assistant_api_key)
html_content = st.text_input("Paste your piece of HTML here:")
extract_button = st.button("Extract data format")
if html_content and extract_button:
try:
output = gpt_assistant.chain_response_format(html_content)
st.session_state['output_format'] = output
except NameError:
st.write("Complete the API key field")
except AuthenticationError:
st.write("Invalid API key")
if 'output_format' in st.session_state:
output_format = st.code(st.session_state['output_format'], language="json")
if st.button("Generate the code"):
try:
python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content)
st.session_state['code_generated'] = python_code
st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)"
except NameError:
st.write("Complete the API key field")
except AuthenticationError:
st.write("Invalid API key")
if 'code_generated' in st.session_state:
python_function_label = st.write("Here is your python function:")
code_generated = st.code(st.session_state['code_generated'],language="python")
full_content = st.text_input("Paste your complete HTML here:")
test_code = st.button("Test the code")
if full_content and test_code:
html_data = full_content
result = None
exec(st.session_state['code_generated_exec'], globals())
if result:
st.write("data extracted successfully")
# show data in table
st.table(result)
else:
st.write("error extracting data")
with st.expander(label="How to use this app"):
st.write("1. Paste the html code of your target element in the first text box and press \"Enter\"")
example = st.button("Show example")
if example:
example = False
text_area = st.text_area("Example", value='<li><div class="product"> <h3 class="title">Product 1</h3> <p class="description">This is the description of the product 1</p> <span class="price">10.00</span> </div></li>')
close_example = st.button("Close example")
if close_example:
example = False
close_example.disabled = True
text_area = None
st.write("2. Click on the button 'Extract data format'")
st.write("3. Click on the button 'Generate the code'")
st.write("4. Paste the complete html code in the last text box to test the auto generated code")
st.write("5. Copy the code and include it in your own projects")