klasocki commited on
Commit
35c0239
1 Parent(s): 5760b44

Fix the tests and converting model results to strings

Browse files
Files changed (4) hide show
  1. app.py +3 -2
  2. src/baseline.py +13 -6
  3. tests/test_baseline.py +3 -3
  4. tests/test_integration.py +33 -18
app.py CHANGED
@@ -9,7 +9,8 @@ logging.basicConfig(level=logging.INFO)
9
 
10
  @app.route('/', methods=['GET'])
11
  def root():
12
- return ("Welcome to the comma fixer. Go to /fix-commas?s='some text' or /baseline/fix-commas?s='some text' to try "
 
13
  "out the functionality.")
14
 
15
 
@@ -17,7 +18,7 @@ def root():
17
  def fix_commas_with_baseline():
18
  data = request.get_json()
19
  if 's' in data:
20
- return make_response(jsonify({"s": fix_commas(app.baseline_pipeline, data['s'])}), 200)
21
  else:
22
  return make_response("Parameter 's' missing", 400)
23
 
 
9
 
10
  @app.route('/', methods=['GET'])
11
  def root():
12
+ return ("Welcome to the comma fixer. Send a POST request to /fix-commas or /baseline/fix-commas with a string "
13
+ "'s' in the JSON body to try "
14
  "out the functionality.")
15
 
16
 
 
18
  def fix_commas_with_baseline():
19
  data = request.get_json()
20
  if 's' in data:
21
+ return make_response(jsonify({'s': fix_commas(app.baseline_pipeline, data['s'])}), 200)
22
  else:
23
  return make_response("Parameter 's' missing", 400)
24
 
src/baseline.py CHANGED
@@ -14,16 +14,23 @@ def _remove_punctuation(s: str) -> str:
14
  return s
15
 
16
 
17
- def _convert_pipeline_json_to_string(pipeline_json: list[dict]) -> str:
18
  # TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
19
  # TODO don't accept tokens with commas inside words
20
- return ''.join(
21
- token['word'].replace('▁', ' ') + token['entity'].replace('0', '')
22
- for token in pipeline_json
23
- ).strip()
 
 
 
 
 
 
24
 
25
 
26
  def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
27
  return _convert_pipeline_json_to_string(
28
- ner_pipeline(_remove_punctuation(s))
 
29
  )
 
14
  return s
15
 
16
 
17
+ def _convert_pipeline_json_to_string(pipeline_json: list[dict], original_s: str) -> str:
18
  # TODO is it ok to remove redundant spaces, or should we keep input data as is and only touch commas?
19
  # TODO don't accept tokens with commas inside words
20
+ result = original_s.replace(',', '') # We will fix the commas, but keep everything else intact
21
+ current_offset = 0
22
+ for i in range(1, len(pipeline_json)):
23
+ current_word = pipeline_json[i - 1]['word'].replace('▁', '')
24
+ current_offset = result.find(current_word, current_offset) + len(current_word)
25
+ # Only insert commas for the final token of a word
26
+ if pipeline_json[i - 1]['entity'] == ',' and pipeline_json[i]['word'].startswith('▁'):
27
+ result = result[:current_offset] + ',' + result[current_offset:]
28
+ current_offset += 1
29
+ return result
30
 
31
 
32
  def fix_commas(ner_pipeline: NerPipeline, s: str) -> str:
33
  return _convert_pipeline_json_to_string(
34
+ ner_pipeline(_remove_punctuation(s)),
35
+ s
36
  )
tests/test_baseline.py CHANGED
@@ -21,9 +21,9 @@ def test_fix_commas_leaves_correct_strings_unchanged(baseline_pipeline, test_inp
21
  @pytest.mark.parametrize(
22
  "test_input, expected",
23
  [
24
- ['I, am', 'I am.'],
25
- ['A complex clause however it misses a comma something else and a dot?',
26
- 'A complex claus,e, however, it misses a comma, something else and a dot.']]
27
  )
28
  def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
29
  result = fix_commas(baseline_pipeline, s=test_input)
 
21
  @pytest.mark.parametrize(
22
  "test_input, expected",
23
  [
24
+ ['I, am.', 'I am.'],
25
+ ['A complex clause however it misses a comma something else and a dot...?',
26
+ 'A complex clause, however, it misses a comma, something else and a dot...?']]
27
  )
28
  def test_fix_commas_fixes_incorrect_commas(baseline_pipeline, test_input, expected):
29
  result = fix_commas(baseline_pipeline, s=test_input)
tests/test_integration.py CHANGED
@@ -1,34 +1,49 @@
1
- import json
 
2
 
3
  from app import app
4
- import pytest
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
- def test_fix_commas_fails_on_no_parameter():
8
- response = app.test_client().post('/baseline/fix-commas/')
9
  assert response.status_code == 400
10
 
11
 
12
  @pytest.mark.parametrize(
13
  "test_input",
14
- [[''],
15
- ['Hello world.'],
16
- ['This test string should not have any commas inside it.']]
17
  )
18
- def test_fix_commas_plain_string_unchanged(test_input: str):
19
- response = app.test_client().post('/baseline/fix-commas/', data={'s': test_input})
20
- print(response.data.decode('utf-8'))
21
- # result = json.loads(response.data.decode('utf-8')).get('s')
22
  assert response.status_code == 200
23
- # assert result == test_input
24
 
25
 
26
  @pytest.mark.parametrize(
27
  "test_input, expected",
28
- [['', ''],
29
- ['Hello world.', 'Hello world.'],
30
- ['This test string should not have any commas inside it.',
31
- 'This test string should not have any commas inside it.']]
32
  )
33
- def test_fix_commas_fixes_wrong_commas(test_input: str, expected: str):
34
- assert False
 
 
 
 
1
+ from flask import json
2
+ import pytest
3
 
4
  from app import app
5
+ from baseline import create_baseline_pipeline
6
+
7
+
8
+ @pytest.fixture()
9
+ def client():
10
+ app.config["DEBUG"] = True
11
+ app.config["TESTING"] = True
12
+ app.baseline_pipeline = create_baseline_pipeline()
13
+ yield app.test_client()
14
+
15
+
16
+ def test_fix_commas_fails_on_no_parameter(client):
17
+ response = client.post('/baseline/fix-commas/')
18
+ assert response.status_code == 400
19
 
20
 
21
+ def test_fix_commas_fails_on_wrong_parameters(client):
22
+ response = client.post('/baseline/fix-commas/', json={'text': "Some text."})
23
  assert response.status_code == 400
24
 
25
 
26
  @pytest.mark.parametrize(
27
  "test_input",
28
+ ['',
29
+ 'Hello world.',
30
+ 'This test string should not have any commas inside it.']
31
  )
32
+ def test_fix_commas_plain_string_unchanged(client, test_input: str):
33
+ response = client.post('/baseline/fix-commas/', json={'s': test_input})
34
+
 
35
  assert response.status_code == 200
36
+ assert response.get_json().get('s') == test_input
37
 
38
 
39
  @pytest.mark.parametrize(
40
  "test_input, expected",
41
+ [['I am, here.', 'I am here.'],
42
+ ['books pens and pencils',
43
+ 'books, pens and pencils.']]
 
44
  )
45
+ def test_fix_commas_fixes_wrong_commas(client, test_input: str, expected: str):
46
+ response = client.post('/baseline/fix-commas/', json={'s': test_input})
47
+
48
+ assert response.status_code == 200
49
+ assert response.get_json().get('s') == expected