Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- .gitignore +2 -0
- AssistantService.py +22 -0
- ExcecuteFunction.py +9 -0
- LICENSE +21 -0
- app.py +60 -0
- config.ini.example +2 -0
- requirements.txt +73 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
config.ini
|
2 |
+
__pycache__/
|
AssistantService.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chat_models import ChatOpenAI
|
2 |
+
from chains.output_format.base import chain_output_format
|
3 |
+
from chains.code_generator.base import chain_code_generator
|
4 |
+
import os
|
5 |
+
|
6 |
+
class GPTAssistant():
|
7 |
+
def __init__(self,api_key:str):
|
8 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
9 |
+
self.llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', request_timeout=120, client=None)
|
10 |
+
|
11 |
+
def chain_response_format(self, html_content):
|
12 |
+
# prompt templates
|
13 |
+
output_format_chain = chain_output_format(self.llm)
|
14 |
+
|
15 |
+
# chain
|
16 |
+
return output_format_chain.run(html_content=html_content)
|
17 |
+
|
18 |
+
def chain_code_generator(self, output_format, html_content):
|
19 |
+
# Prompt templates
|
20 |
+
script_chain = chain_code_generator(self.llm)
|
21 |
+
|
22 |
+
return script_chain.run(output_format=output_format, html_content=html_content)
|
ExcecuteFunction.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib
|
2 |
+
|
3 |
+
def execute_function():
|
4 |
+
module = "output"
|
5 |
+
function = "extract_info"
|
6 |
+
module = importlib.import_module(module)
|
7 |
+
function = getattr(module, function)
|
8 |
+
print("returning function")
|
9 |
+
return function
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Tomas Bourgeois
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from AssistantService import GPTAssistant
|
2 |
+
from openai.error import AuthenticationError
|
3 |
+
import streamlit as st
|
4 |
+
import configparser
|
5 |
+
|
6 |
+
config = configparser.ConfigParser()
|
7 |
+
config.read('config.ini')
|
8 |
+
if 'DEFAULT' in config:
|
9 |
+
assistant_api_key = config['DEFAULT'].get('API-KEY', '')
|
10 |
+
|
11 |
+
st.title("Web Scraping Assistant")
|
12 |
+
st.write("This app helps you to extract data from HTML code using web scraping. It uses GPT-3.5-turbo to generate the code for you.")
|
13 |
+
if assistant_api_key == '':
|
14 |
+
assistant_api_key = st.text_input("Paste your API key here:")
|
15 |
+
if assistant_api_key:
|
16 |
+
gpt_assistant = GPTAssistant(assistant_api_key)
|
17 |
+
else:
|
18 |
+
gpt_assistant = GPTAssistant(assistant_api_key)
|
19 |
+
|
20 |
+
html_content = st.text_input("Paste your piece of HTML here:")
|
21 |
+
|
22 |
+
if html_content:
|
23 |
+
if st.button("Extract data format"):
|
24 |
+
try:
|
25 |
+
output = gpt_assistant.chain_response_format(html_content)
|
26 |
+
st.session_state['output_format'] = output
|
27 |
+
except NameError:
|
28 |
+
st.write("Complete the API key field")
|
29 |
+
except AuthenticationError:
|
30 |
+
st.write("Invalid API key")
|
31 |
+
|
32 |
+
if 'output_format' in st.session_state:
|
33 |
+
output_format = st.code(st.session_state['output_format'], language="json")
|
34 |
+
|
35 |
+
if st.button("Generate the code"):
|
36 |
+
try:
|
37 |
+
python_code = gpt_assistant.chain_code_generator(st.session_state['output_format'], html_content)
|
38 |
+
st.session_state['code_generated'] = python_code
|
39 |
+
st.session_state['code_generated_exec'] = python_code + "\nresult = extract_info(html_data)"
|
40 |
+
|
41 |
+
except NameError:
|
42 |
+
st.write("Complete the API key field")
|
43 |
+
except AuthenticationError:
|
44 |
+
st.write("Invalid API key")
|
45 |
+
|
46 |
+
|
47 |
+
if 'code_generated' in st.session_state:
|
48 |
+
python_function_label = st.write("Here is your python function:")
|
49 |
+
code_generated = st.code(st.session_state['code_generated'],language="python")
|
50 |
+
full_content = st.text_input("Paste your complete HTML here:")
|
51 |
+
if full_content and st.button("Test the code"):
|
52 |
+
html_data = full_content
|
53 |
+
result = None
|
54 |
+
exec(st.session_state['code_generated_exec'], globals())
|
55 |
+
if result:
|
56 |
+
st.write("data extracted successfully")
|
57 |
+
# show data in table
|
58 |
+
st.table(result)
|
59 |
+
else:
|
60 |
+
st.write("error extracting data")
|
config.ini.example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[DEFAULT]
|
2 |
+
API-KEY=OpenAI API KEY HERE
|
requirements.txt
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.4
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==4.2.2
|
4 |
+
async-timeout==4.0.2
|
5 |
+
attrs==23.1.0
|
6 |
+
beautifulsoup4==4.12.2
|
7 |
+
blinker==1.6.2
|
8 |
+
cachetools==5.3.0
|
9 |
+
certifi==2023.5.7
|
10 |
+
charset-normalizer==3.1.0
|
11 |
+
click==8.1.3
|
12 |
+
colorama==0.4.6
|
13 |
+
dataclasses-json==0.5.7
|
14 |
+
decorator==5.1.1
|
15 |
+
entrypoints==0.4
|
16 |
+
frozenlist==1.3.3
|
17 |
+
gitdb==4.0.10
|
18 |
+
GitPython==3.1.31
|
19 |
+
greenlet==2.0.2
|
20 |
+
idna==3.4
|
21 |
+
importlib-metadata==6.6.0
|
22 |
+
Jinja2==3.1.2
|
23 |
+
jsonschema==4.17.3
|
24 |
+
langchain==0.0.167
|
25 |
+
lxml==4.9.2
|
26 |
+
markdown-it-py==2.2.0
|
27 |
+
MarkupSafe==2.1.2
|
28 |
+
marshmallow==3.19.0
|
29 |
+
marshmallow-enum==1.5.1
|
30 |
+
mdurl==0.1.2
|
31 |
+
MechanicalSoup==1.2.0
|
32 |
+
multidict==6.0.4
|
33 |
+
mypy-extensions==1.0.0
|
34 |
+
numexpr==2.8.4
|
35 |
+
numpy==1.24.3
|
36 |
+
openai==0.27.6
|
37 |
+
openapi-schema-pydantic==1.2.4
|
38 |
+
packaging==23.1
|
39 |
+
pandas==2.0.1
|
40 |
+
Pillow==9.5.0
|
41 |
+
protobuf==3.20.3
|
42 |
+
pyarrow==12.0.0
|
43 |
+
pydantic==1.10.7
|
44 |
+
pydeck==0.8.1b0
|
45 |
+
Pygments==2.15.1
|
46 |
+
Pympler==1.0.1
|
47 |
+
pyrsistent==0.19.3
|
48 |
+
python-dateutil==2.8.2
|
49 |
+
pytz==2023.3
|
50 |
+
pytz-deprecation-shim==0.1.0.post0
|
51 |
+
PyYAML==6.0
|
52 |
+
requests==2.30.0
|
53 |
+
rich==13.3.5
|
54 |
+
six==1.16.0
|
55 |
+
smmap==5.0.0
|
56 |
+
soupsieve==2.4.1
|
57 |
+
SQLAlchemy==2.0.13
|
58 |
+
streamlit==1.22.0
|
59 |
+
streamlit-ace==0.1.1
|
60 |
+
tenacity==8.2.2
|
61 |
+
toml==0.10.2
|
62 |
+
toolz==0.12.0
|
63 |
+
tornado==6.3.1
|
64 |
+
tqdm==4.65.0
|
65 |
+
typing-inspect==0.8.0
|
66 |
+
typing_extensions==4.5.0
|
67 |
+
tzdata==2023.3
|
68 |
+
tzlocal==4.3
|
69 |
+
urllib3==2.0.2
|
70 |
+
validators==0.20.0
|
71 |
+
watchdog==3.0.0
|
72 |
+
yarl==1.9.2
|
73 |
+
zipp==3.15.0
|