Michelvh commited on
Commit
92da267
1 Parent(s): 0177f22

Update app

Browse files
Files changed (2) hide show
  1. app.py +54 -0
  2. requirements.txt +2 -0
app.py CHANGED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import T5ForConditionalGeneration, T5TokenizerFast
3
+ import nltk
4
+ from nltk import tokenize
5
+
6
+ checkpoint = "yhavinga/t5-base-dutch"
7
+ tokenizer = T5TokenizerFast.from_pretrained(checkpoint)
8
+ tokenizer.sep_token = '<sep>'
9
+ tokenizer.add_tokens(['<sep>'])
10
+
11
+ hfmodel = T5ForConditionalGeneration.from_pretrained("Michelvh/t5-end2end-questions-generation-dutch")
12
+
13
+ def hf_run_model(input_string, **generator_args):
14
+ generator_args = {
15
+ "max_length": 256,
16
+ "num_beams": 4,
17
+ "length_penalty": 1.5,
18
+ "no_repeat_ngram_size": 3,
19
+ "early_stopping": True,
20
+ "num_return_sequences": 1,
21
+ }
22
+ input_string = "generate questions: " + input_string + " </s>"
23
+ input_ids = tokenizer.encode(input_string, return_tensors="pt")
24
+ res = hfmodel.generate(input_ids, **generator_args)
25
+ output = tokenizer.batch_decode(res, skip_special_tokens=True)
26
+ output = [item.split("<sep>") for item in output]
27
+ return output
28
+
29
+
30
+ def chunkText(text, frameSize=5):
31
+ sentences = tokenize.sent_tokenize(text)
32
+ frames = []
33
+ step_size = frameSize - 1
34
+ for index in range(len(sentences) - step_size + 1):
35
+ frames.append(" ".join(sentences[index:index + step_size]))
36
+ return frames
37
+
38
+
39
+ def flatten(l):
40
+ return [item for sublist in l for item in sublist]
41
+
42
+
43
+ def run_model_with_frames(text):
44
+ frames = chunkText(text)
45
+ result = set()
46
+ for frame in frames:
47
+ answers = flatten(hf_run_model(frame))
48
+ for answer in answers:
49
+ result.add(answer.strip())
50
+ return result
51
+
52
+
53
+ iface = gr.Interface(fn=run_model_with_frames, inputs="text", outputs="text")
54
+ iface.launch()
requirements.txt CHANGED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ nltk