Spaces:
Sleeping
Sleeping
Fix the tests and converting model results to strings
Browse files- app.py +3 -2
- src/baseline.py +13 -6
- tests/test_baseline.py +3 -3
- tests/test_integration.py +33 -18
app.py
CHANGED
@@ -9,7 +9,8 @@ logging.basicConfig(level=logging.INFO)
|
|
9 |
|
10 |
@app.route('/', methods=['GET'])
|
11 |
def root():
|
12 |
-
return ("Welcome to the comma fixer.
|
|
|
13 |
"out the functionality.")
|
14 |
|
15 |
|
@@ -17,7 +18,7 @@ def root():
|
|
17 |
def fix_commas_with_baseline():
|
18 |
data = request.get_json()
|
19 |
if 's' in data:
|
20 |
-
return make_response(jsonify({
|
21 |
else:
|
22 |
return make_response("Parameter 's' missing", 400)
|
23 |
|
|
|
9 |
|
10 |
@app.route('/', methods=['GET'])
|
11 |
def root():
|
12 |
+
return ("Welcome to the comma fixer. Send a POST request to /fix-commas or /baseline/fix-commas with a string "
|
13 |
+
"'s' in the JSON body to try "
|
14 |
"out the functionality.")
|
15 |
|
16 |
|
|
|
18 |
def fix_commas_with_baseline():
|
19 |
data = request.get_json()
|
20 |
if 's' in data:
|
21 |
+
return make_response(jsonify({'s': fix_commas(app.baseline_pipeline, data['s'])}), 200)
|
22 |
else:
|
23 |
return make_response("Parameter 's' missing", 400)
|
24 |
|
src/baseline.py
CHANGED
@@ -14,16 +14,23 @@ def _remove_punctuation(s: str) -> str:
|
|
14 |
return s
|
15 |
|
16 |
|
17 |
-
def _convert_pipeline_json_to_string(pipeline_json: list[dict]) -> str:
|
18 |
# TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
|
19 |
# TODO don't accept tokens with commas inside words
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
|
26 |
def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
|
27 |
return _convert_pipeline_json_to_string(
|
28 |
-
ner_pipeline(_remove_punctuation(s))
|
|
|
29 |
)
|
|
|
14 |
return s
|
15 |
|
16 |
|
17 |
+
def _convert_pipeline_json_to_string(pipeline_json: list[dict], original_s: str) -> str:
|
18 |
# TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
|
19 |
# TODO don't accept tokens with commas inside words
|
20 |
+
result = original_s.replace(',', '') # We will fix the commas, but keep everything else intact
|
21 |
+
current_offset = 0
|
22 |
+
for i in range(1, len(pipeline_json)):
|
23 |
+
current_word = pipeline_json[i - 1]['word'].replace('▁', '')
|
24 |
+
current_offset = result.find(current_word, current_offset) + len(current_word)
|
25 |
+
# Only insert commas for the final token of a word
|
26 |
+
if pipeline_json[i - 1]['entity'] == ',' and pipeline_json[i]['word'].startswith('▁'):
|
27 |
+
result = result[:current_offset] + ',' + result[current_offset:]
|
28 |
+
current_offset += 1
|
29 |
+
return result
|
30 |
|
31 |
|
32 |
def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
|
33 |
return _convert_pipeline_json_to_string(
|
34 |
+
ner_pipeline(_remove_punctuation(s)),
|
35 |
+
s
|
36 |
)
|
tests/test_baseline.py
CHANGED
@@ -21,9 +21,9 @@ def test_fix_commas_leaves_correct_strings_unchanged(baseline_pipeline, test_inp
|
|
21 |
@pytest.mark.parametrize(
|
22 |
"test_input, expected",
|
23 |
[
|
24 |
-
['I, am', 'I am.'],
|
25 |
-
['A complex clause however it misses a comma something else and a dot
|
26 |
-
'A complex
|
27 |
)
|
28 |
def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
|
29 |
result = fix_commas(baseline_pipeline, s=test_input)
|
|
|
21 |
@pytest.mark.parametrize(
|
22 |
"test_input, expected",
|
23 |
[
|
24 |
+
['I, am.', 'I am.'],
|
25 |
+
['A complex clause however it misses a comma something else and a dot...?',
|
26 |
+
'A complex clause, however, it misses a comma, something else and a dot...?']]
|
27 |
)
|
28 |
def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
|
29 |
result = fix_commas(baseline_pipeline, s=test_input)
|
tests/test_integration.py
CHANGED
@@ -1,34 +1,49 @@
|
|
1 |
-
import json
|
|
|
2 |
|
3 |
from app import app
|
4 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
-
def
|
8 |
-
response =
|
9 |
assert response.status_code == 400
|
10 |
|
11 |
|
12 |
@pytest.mark.parametrize(
|
13 |
"test_input",
|
14 |
-
[
|
15 |
-
|
16 |
-
|
17 |
)
|
18 |
-
def test_fix_commas_plain_string_unchanged(test_input: str):
|
19 |
-
response =
|
20 |
-
|
21 |
-
# result = json.loads(response.data.decode('utf-8')).get('s')
|
22 |
assert response.status_code == 200
|
23 |
-
|
24 |
|
25 |
|
26 |
@pytest.mark.parametrize(
|
27 |
"test_input, expected",
|
28 |
-
[['', ''],
|
29 |
-
['
|
30 |
-
|
31 |
-
'This test string should not have any commas inside it.']]
|
32 |
)
|
33 |
-
def test_fix_commas_fixes_wrong_commas(test_input: str, expected: str):
|
34 |
-
|
|
|
|
|
|
|
|
1 |
+
from flask import json
|
2 |
+
import pytest
|
3 |
|
4 |
from app import app
|
5 |
+
from baseline import create_baseline_pipeline
|
6 |
+
|
7 |
+
|
8 |
+
@pytest.fixture()
|
9 |
+
def client():
|
10 |
+
app.config["DEBUG"] = True
|
11 |
+
app.config["TESTING"] = True
|
12 |
+
app.baseline_pipeline = create_baseline_pipeline()
|
13 |
+
yield app.test_client()
|
14 |
+
|
15 |
+
|
16 |
+
def test_fix_commas_fails_on_no_parameter(client):
|
17 |
+
response = client.post('/baseline/fix-commas/')
|
18 |
+
assert response.status_code == 400
|
19 |
|
20 |
|
21 |
+
def test_fix_commas_fails_on_wrong_parameters(client):
|
22 |
+
response = client.post('/baseline/fix-commas/', json={'text': "Some text."})
|
23 |
assert response.status_code == 400
|
24 |
|
25 |
|
26 |
@pytest.mark.parametrize(
|
27 |
"test_input",
|
28 |
+
['',
|
29 |
+
'Hello world.',
|
30 |
+
'This test string should not have any commas inside it.']
|
31 |
)
|
32 |
+
def test_fix_commas_plain_string_unchanged(client, test_input: str):
|
33 |
+
response = client.post('/baseline/fix-commas/', json={'s': test_input})
|
34 |
+
|
|
|
35 |
assert response.status_code == 200
|
36 |
+
assert response.get_json().get('s') == test_input
|
37 |
|
38 |
|
39 |
@pytest.mark.parametrize(
|
40 |
"test_input, expected",
|
41 |
+
[['I am, here.', 'I am here.'],
|
42 |
+
['books pens and pencils',
|
43 |
+
'books, pens and pencils.']]
|
|
|
44 |
)
|
45 |
+
def test_fix_commas_fixes_wrong_commas(client, test_input: str, expected: str):
|
46 |
+
response = client.post('/baseline/fix-commas/', json={'s': test_input})
|
47 |
+
|
48 |
+
assert response.status_code == 200
|
49 |
+
assert response.get_json().get('s') == expected
|