Spaces:

umutbozdag
/

humanizer_model

Runtime error

App Files Files Community

lucidmorto commited on Jul 15

Commit

3f7af4c

•

1 Parent(s): b7588d3

feat: Improve text generation with advanced parameters

Browse files

Enhanced the text generation function to preprocess input text, refine input preparation, and optimize output generation with advanced parameters like top-k sampling, top-p sampling, and temperature. This increases the quality and variability of generated text while ensuring robustness with early stopping and stricter no-repeat n-gram constraints.

Files changed (1) hide show

app.py +23 -3

app.py CHANGED Viewed

@@ -6,9 +6,29 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 def generate_text(input_text):
-    input_ids = tokenizer("summarize: " + input_text, return_tensors="pt", max_length=512, truncation=True).input_ids
-    outputs = model.generate(input_ids, max_length=300, num_return_sequences=1, no_repeat_ngram_size=2)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 iface = gr.Interface(
     fn=generate_text,

 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 def generate_text(input_text):
+    # Preprocess input text
+    input_text = input_text.strip()
+    # Prepare input for the model
+    input_ids = tokenizer.encode("humanize: " + input_text, return_tensors="pt", max_length=512, truncation=True)
+    # Generate text with improved parameters
+    outputs = model.generate(
+        input_ids,
+        max_length=300,
+        min_length=30,
+        num_return_sequences=1,
+        no_repeat_ngram_size=3,
+        top_k=50,
+        top_p=0.95,
+        temperature=0.8,
+        do_sample=True,
+        early_stopping=True
+    )
+    # Decode and clean up the generated text
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text.strip()
 iface = gr.Interface(
     fn=generate_text,