from AssistantService import GPTAssistant from openai.error import AuthenticationError import streamlit as st import configparser import os config = configparser.ConfigParser() config.read('config.ini') if 'DEFAULT' in config: assistant_api_key = config['DEFAULT'].get('API-KEY', '') os.environ["LANGCHAIN_TRACING_V2"]="true" os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com" os.environ["LANGCHAIN_API_KEY"]=st.secrets["LANGCHAIN_API_KEY"] os.environ["LANGCHAIN_PROJECT"]=st.secrets["LANGCHAIN_PROJECT"] st.title("Web Scraping Assistant") st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you.") st.write("Contribute to this project on [GitHub](https://github.com/CognitiveLabs/GPT-auto-webscraping)") if assistant_api_key == '': assistant_api_key = st.text_input("Paste your API key here:") if assistant_api_key: gpt_assistant = GPTAssistant(assistant_api_key) else: gpt_assistant = GPTAssistant(assistant_api_key) html_content = st.text_input("Paste your piece of HTML here:") extract_button = st.button("Extract data format") if html_content and extract_button: try: output = gpt_assistant.chain_response_format(html_content) st.session_state['output_format'] = output except NameError: st.write("Complete the API key field") except AuthenticationError: st.write("Invalid API key") if 'output_format' in st.session_state: output_format = st.code(st.session_state['output_format'], language="json") if st.button("Generate the code"): try: python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content) st.session_state['code_generated'] = python_code st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)" except NameError: st.write("Complete the API key field") except AuthenticationError: st.write("Invalid API key") if 'code_generated' in st.session_state: python_function_label = st.write("Here is your python function:") code_generated = st.code(st.session_state['code_generated'],language="python") full_content = st.text_input("Paste your complete HTML here:") test_code = st.button("Test the code") if full_content and test_code: html_data = full_content result = None exec(st.session_state['code_generated_exec'], globals()) if result: st.write("data extracted successfully") # show data in table st.table(result) else: st.write("error extracting data") with st.expander(label="How to use this app"): st.write("1. Paste the html code of your target element in the first text box and press \"Enter\"") example = st.button("Show example") if example: example = False text_area = st.text_area("Example", value='
  • Product 1

    This is the description of the product 1

    10.00
  • ') close_example = st.button("Close example") if close_example: example = False close_example.disabled = True text_area = None st.write("2. Click on the button 'Extract data format'") st.write("3. Click on the button 'Generate the code'") st.write("4. Paste the complete html code in the last text box to test the auto generated code") st.write("5. Copy the code and include it in your own projects")