Mike Frantz commited on
Commit
09b16c3
1 Parent(s): 31e0744

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +69 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import difflib
2
+ import gradio as gr
3
+ import soundfile as sf
4
+ from transformers import pipeline
5
+ from tokenizers.pre_tokenizers import Whitespace
6
+ from tokenizers.normalizers import BertNormalizer
7
+
8
+ processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
9
+ model = AutoModelForCTC.from_pretrained("facebook/wav2vec2-base-960h")
10
+
11
+ audio_input = gr.inputs.Audio(source='microphone', label='Read the passage', type="filepath")
12
+ text_input = gr.inputs.Textbox(label='Sample passage')
13
+ text_output = gr.outputs.Textbox(label='Output')
14
+ highlighted_text_output = gr.outputs.HighlightedText(color_map={"+": "green", "-": "pink"})
15
+
16
+ speech_to_text = pipeline('automatic-speech-recognition')
17
+
18
+ sm = difflib.SequenceMatcher(None)
19
+
20
+ splitter = Whitespace()
21
+ normalizer = BertNormalizer()
22
+
23
+ def preprocess(s):
24
+ return [i[0] for i in splitter.pre_tokenize_str(normalizer.normalize_str(s))]
25
+
26
+ def diff_texts(text1, text2):
27
+ d = difflib.Differ()
28
+ return [
29
+ (token[2:], token[0] if token[0] != " " else None)
30
+ for token in d.compare(preprocess(text1), preprocess(text2))
31
+ ]
32
+
33
+ def func(audio, text):
34
+ # print(audio)
35
+ # print(text)
36
+ results = speech_to_text(audio)['text'].lower()
37
+ text = text.lower()
38
+ sm.set_seqs(preprocess(results), preprocess(text))
39
+
40
+ r = f"""
41
+ Original passage:
42
+ {text}
43
+
44
+ What we heard:
45
+ {results}
46
+
47
+ Ratio:
48
+ {sm.ratio()}
49
+ """
50
+
51
+ d = diff_texts(results, text)
52
+
53
+ return r, d
54
+
55
+ title = "Reading Practice Application"
56
+ description = """
57
+ This application is a POC for reading practice.
58
+ It compares some input text against an audio recording.
59
+ The intention is to help individuals with reading challenges identify how to improve their reading.
60
+ """
61
+
62
+ gr.Interface(
63
+ func,
64
+ inputs=[audio_input, text_input],
65
+ outputs=[text_output, highlighted_text_output],
66
+ title=title,
67
+ description=description
68
+ ).launch(inline=True, debug=True)
69
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.18.0
2
+ gradio==2.9.1
3
+ datasets==2.0.0
4
+ tokenizers==0.11.6
5
+