Spaces:
Sleeping
Sleeping
initial commit
Browse files- .gitignore +2 -0
- Dockerfile +22 -0
- app.py +15 -0
- qnabackend/__init__.py +12 -0
- qnabackend/common/__init__.py +0 -0
- qnabackend/common/utils.py +50 -0
- qnabackend/config.py +3 -0
- qnabackend/resources/__init__.py +0 -0
- qnabackend/resources/routes.py +17 -0
- requirements.txt +36 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
**/__pycache__/
|
2 |
+
.venv/
|
Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
RUN useradd -m -u 1000 user
|
13 |
+
USER user
|
14 |
+
ENV HOME=/home/user \
|
15 |
+
PATH=/home/user/.local/bin:$PATH
|
16 |
+
|
17 |
+
WORKDIR $HOME/app
|
18 |
+
|
19 |
+
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
|
22 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from qnabackend import create_app
|
2 |
+
|
3 |
+
|
4 |
+
app = create_app()
|
5 |
+
|
6 |
+
@app.route('/')
|
7 |
+
def home():
|
8 |
+
return "hello world"
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == '__main__':
|
12 |
+
app.run(debug = True, port = 5000)
|
13 |
+
|
14 |
+
# if __name__ == '__main__':
|
15 |
+
# app.run(debug = False, host = "0.0.0.0", port = 7860)
|
qnabackend/__init__.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask
|
2 |
+
from qnabackend.config import Config
|
3 |
+
from flask_cors import CORS
|
4 |
+
|
5 |
+
def create_app(config_class = Config):
|
6 |
+
app = Flask(__name__)
|
7 |
+
CORS(app)
|
8 |
+
app.config.from_object(config_class)
|
9 |
+
from qnabackend.resources.routes import resources
|
10 |
+
app.register_blueprint(resources)
|
11 |
+
|
12 |
+
return app
|
qnabackend/common/__init__.py
ADDED
File without changes
|
qnabackend/common/utils.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bs4 import BeautifulSoup
|
2 |
+
import requests
|
3 |
+
|
4 |
+
def getText(url : str):
|
5 |
+
response = requests.get(url)
|
6 |
+
|
7 |
+
if response.status_code == 200:
|
8 |
+
html_content = response.content
|
9 |
+
else:
|
10 |
+
print(f"[INFO] couldn't access website data, try again")
|
11 |
+
return
|
12 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
13 |
+
|
14 |
+
text_elements = soup.find_all(['p'])
|
15 |
+
scraped_text = ' '.join(element.get_text() for element in text_elements)
|
16 |
+
|
17 |
+
if len(scraped_text) > 20000:
|
18 |
+
print(f"[ERROR] page too large to perform qna")
|
19 |
+
return
|
20 |
+
|
21 |
+
return scraped_text
|
22 |
+
|
23 |
+
|
24 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
25 |
+
|
26 |
+
model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large')
|
27 |
+
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')
|
28 |
+
|
29 |
+
def getAnswer(url : str, question : str):
|
30 |
+
context = getText(url)
|
31 |
+
|
32 |
+
|
33 |
+
inputs = tokenizer(f"context : {context}, question : {question}", return_tensors = 'pt').input_ids
|
34 |
+
|
35 |
+
outputs = model.generate(
|
36 |
+
inputs,
|
37 |
+
min_length = 10,
|
38 |
+
max_new_tokens = 600,
|
39 |
+
length_penalty = 1,
|
40 |
+
num_beams = 3,
|
41 |
+
no_repeat_ngram_size = 3,
|
42 |
+
temperature = 0.7,
|
43 |
+
top_k = 110,
|
44 |
+
top_p = 0.8,
|
45 |
+
repetition_penalty = 2.1
|
46 |
+
)
|
47 |
+
|
48 |
+
answer = tokenizer.decode(outputs[0], skip_special_tokens = True)
|
49 |
+
|
50 |
+
return answer
|
qnabackend/config.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
class Config:
|
3 |
+
SECRET_KEY = '7a2b25ca707a5be465f9a8894f528999'
|
qnabackend/resources/__init__.py
ADDED
File without changes
|
qnabackend/resources/routes.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Blueprint, request
|
2 |
+
from flask_restful import Api, Resource
|
3 |
+
from qnabackend.common.utils import getAnswer
|
4 |
+
|
5 |
+
resources = Blueprint('resources', __name__)
|
6 |
+
api = Api(resources)
|
7 |
+
|
8 |
+
class Backend(Resource):
|
9 |
+
def post(self):
|
10 |
+
url = request.json['url']
|
11 |
+
question = request.json['question']
|
12 |
+
|
13 |
+
answer = getAnswer(url, question)
|
14 |
+
|
15 |
+
return {'question' : question, 'answer' : answer}
|
16 |
+
|
17 |
+
api.add_resource(Backend, '/question')
|
requirements.txt
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aniso8601==9.0.1
|
2 |
+
beautifulsoup4==4.12.2
|
3 |
+
blinker==1.6.2
|
4 |
+
certifi==2023.7.22
|
5 |
+
charset-normalizer==3.2.0
|
6 |
+
click==8.1.6
|
7 |
+
colorama==0.4.6
|
8 |
+
filelock==3.12.2
|
9 |
+
Flask==2.3.2
|
10 |
+
Flask-Cors==4.0.0
|
11 |
+
Flask-RESTful==0.3.10
|
12 |
+
fsspec==2023.6.0
|
13 |
+
huggingface-hub==0.16.4
|
14 |
+
idna==3.4
|
15 |
+
itsdangerous==2.1.2
|
16 |
+
Jinja2==3.1.2
|
17 |
+
MarkupSafe==2.1.3
|
18 |
+
mpmath==1.3.0
|
19 |
+
networkx==3.1
|
20 |
+
numpy==1.25.2
|
21 |
+
packaging==23.1
|
22 |
+
pytz==2023.3
|
23 |
+
PyYAML==6.0.1
|
24 |
+
regex==2023.8.8
|
25 |
+
requests==2.31.0
|
26 |
+
safetensors==0.3.2
|
27 |
+
six==1.16.0
|
28 |
+
soupsieve==2.4.1
|
29 |
+
sympy==1.12
|
30 |
+
tokenizers==0.13.3
|
31 |
+
torch==2.0.1
|
32 |
+
tqdm==4.66.1
|
33 |
+
transformers==4.31.0
|
34 |
+
typing_extensions==4.7.1
|
35 |
+
urllib3==2.0.4
|
36 |
+
Werkzeug==2.3.6
|