import gradio as gr from transformers import T5ForConditionalGeneration, T5TokenizerFast import nltk from nltk import tokenize checkpoint = "yhavinga/t5-base-dutch" tokenizer = T5TokenizerFast.from_pretrained(checkpoint) tokenizer.sep_token = '' tokenizer.add_tokens(['']) hfmodel = T5ForConditionalGeneration.from_pretrained("Michelvh/t5-end2end-questions-generation-dutch") def hf_run_model(input_string, **generator_args): generator_args = { "max_length": 256, "num_beams": 4, "length_penalty": 1.5, "no_repeat_ngram_size": 3, "early_stopping": True, "num_return_sequences": 1, } input_string = "generate questions: " + input_string + " " input_ids = tokenizer.encode(input_string, return_tensors="pt") res = hfmodel.generate(input_ids, **generator_args) output = tokenizer.batch_decode(res, skip_special_tokens=True) output = [item.split("") for item in output] return output def chunkText(text, frameSize=5): sentences = tokenize.sent_tokenize(text) frames = [] step_size = frameSize - 1 for index in range(len(sentences) - step_size + 1): frames.append(" ".join(sentences[index:index + step_size])) return frames def flatten(l): return [item for sublist in l for item in sublist] def run_model_with_frames(text): frames = chunkText(text) result = set() for frame in frames: answers = flatten(hf_run_model(frame)) for answer in answers: result.add(answer.strip()) return result iface = gr.Interface(fn=run_model_with_frames, inputs="text", outputs="text") iface.launch()