MeetJivani commited on
Commit
ef4c284
1 Parent(s): e37a87f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -81
app.py CHANGED
@@ -192,11 +192,11 @@ def predict(
192
  def proc_submission(
193
  input_text: str,
194
  model_name: str,
195
- token_batch_length: int,
196
- length_penalty: float,
197
- repetition_penalty: float,
198
- no_repeat_ngram_size: int,
199
- predrop_stopwords: bool,
200
  num_beams: int = 3,
201
  max_input_length: int = 8182,
202
  ):
@@ -503,8 +503,6 @@ if __name__ == "__main__":
503
  gr.Markdown("## Load Inputs & Select Parameters")
504
  gr.Markdown(
505
  """Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
506
-
507
- # See [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for details.
508
  """
509
  )
510
  with gr.Row(variant="compact"):
@@ -559,7 +557,7 @@ if __name__ == "__main__":
559
  # gr.Markdown(
560
  # "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
561
  # )
562
- output_text = gr.HTML("<p><em>_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._</em></p>")
563
  with gr.Column():
564
  gr.Markdown("### Results & Scores")
565
  with gr.Row():
@@ -587,81 +585,81 @@ if __name__ == "__main__":
587
  label="Summary",
588
  value="<center><i>Summary will appear here!</i></center>",
589
  )
590
- with gr.Column():
591
- gr.Markdown("### **Aggregate Summary Batches**")
592
- gr.Markdown(
593
- "_Note: this is an experimental feature. Feedback welcome in the [discussions](https://hf.co/spaces/pszemraj/document-summarization/discussions)!_"
594
- )
595
- with gr.Row():
596
- aggregate_button = gr.Button(
597
- "Aggregate!",
598
- variant="primary",
599
- )
600
- gr.Markdown(
601
- f"""Aggregate the above batches into a cohesive summary.
602
- - A secondary instruct-tuned LM consolidates info
603
- - Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
604
- """
605
- )
606
- with gr.Column(variant="panel"):
607
- aggregated_summary = gr.HTML(
608
- label="Aggregate Summary",
609
- value="<center><i>Aggregate summary will appear here!</i></center>",
610
- )
611
- gr.Markdown(
612
- "\n\n_Aggregate summary is also appended to the bottom of the `.txt` file._"
613
- )
614
 
615
  gr.Markdown("---")
616
- with gr.Column():
617
- gr.Markdown("### Advanced Settings")
618
- gr.Markdown(
619
- "Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_."
620
- )
621
- with gr.Row(variant="compact"):
622
- length_penalty = gr.Slider(
623
- minimum=0.3,
624
- maximum=1.1,
625
- label="length penalty",
626
- value=0.7,
627
- step=0.05,
628
- )
629
- token_batch_length = gr.Radio(
630
- choices=TOKEN_BATCH_OPTIONS,
631
- label="token batch length",
632
- # select median option
633
- value=TOKEN_BATCH_OPTIONS[len(TOKEN_BATCH_OPTIONS) // 2],
634
- )
635
-
636
- with gr.Row(variant="compact"):
637
- repetition_penalty = gr.Slider(
638
- minimum=1.0,
639
- maximum=5.0,
640
- label="repetition penalty",
641
- value=1.5,
642
- step=0.1,
643
- )
644
- no_repeat_ngram_size = gr.Radio(
645
- choices=[2, 3, 4, 5],
646
- label="no repeat ngram size",
647
- value=3,
648
- )
649
- predrop_stopwords = gr.Checkbox(
650
- label="Drop Stopwords (Pre-Truncation)",
651
- value=False,
652
- )
653
- with gr.Column():
654
- gr.Markdown("## About")
655
- gr.Markdown(
656
- "- Models are fine-tuned on the [🅱️ookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that generalizes well and is useful for summarizing text in academic and everyday use."
657
- )
658
- gr.Markdown(
659
- "- _Update April 2023:_ Additional models fine-tuned on the [PLOS](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-plos-norm) and [ELIFE](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-elife-norm) subsets of the [scientific lay summaries](https://arxiv.org/abs/2210.09932) dataset are available (see dropdown at the top)."
660
- )
661
- gr.Markdown(
662
- "Adjust the max input words & max PDF pages for OCR by duplicating this space and [setting the environment variables](https://hf.co/docs/hub/spaces-overview#managing-secrets) `APP_MAX_WORDS` and `APP_OCR_MAX_PAGES` to the desired integer values."
663
- )
664
- gr.Markdown("---")
665
 
666
  # load_examples_button.click(
667
  # fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
 
192
  def proc_submission(
193
  input_text: str,
194
  model_name: str,
195
+ predrop_stopwords: bool = False,
196
+ repetition_penalty: float = 0.5,
197
+ no_repeat_ngram_size: int = 3,
198
+ length_penalty: float = 1.5,
199
+ token_batch_length: int = 1530,
200
  num_beams: int = 3,
201
  max_input_length: int = 8182,
202
  ):
 
503
  gr.Markdown("## Load Inputs & Select Parameters")
504
  gr.Markdown(
505
  """Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
 
 
506
  """
507
  )
508
  with gr.Row(variant="compact"):
 
557
  # gr.Markdown(
558
  # "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
559
  # )
560
+ output_text = gr.HTML("<p><em>Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios.</em></p>")
561
  with gr.Column():
562
  gr.Markdown("### Results & Scores")
563
  with gr.Row():
 
585
  label="Summary",
586
  value="<center><i>Summary will appear here!</i></center>",
587
  )
588
+ # with gr.Column():
589
+ # gr.Markdown("### **Aggregate Summary Batches**")
590
+ # gr.Markdown(
591
+ # "_Note: this is an experimental feature. Feedback welcome in the [discussions](https://hf.co/spaces/pszemraj/document-summarization/discussions)!_"
592
+ # )
593
+ # with gr.Row():
594
+ # aggregate_button = gr.Button(
595
+ # "Aggregate!",
596
+ # variant="primary",
597
+ # )
598
+ # gr.Markdown(
599
+ # f"""Aggregate the above batches into a cohesive summary.
600
+ # - A secondary instruct-tuned LM consolidates info
601
+ # - Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
602
+ # """
603
+ # )
604
+ # with gr.Column(variant="panel"):
605
+ # aggregated_summary = gr.HTML(
606
+ # label="Aggregate Summary",
607
+ # value="<center><i>Aggregate summary will appear here!</i></center>",
608
+ # )
609
+ # gr.Markdown(
610
+ # "\n\n_Aggregate summary is also appended to the bottom of the `.txt` file._"
611
+ # )
612
 
613
  gr.Markdown("---")
614
+ # with gr.Column():
615
+ # gr.Markdown("### Advanced Settings")
616
+ # gr.Markdown(
617
+ # "Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_."
618
+ # )
619
+ # with gr.Row(variant="compact"):
620
+ # length_penalty = gr.Slider(
621
+ # minimum=0.3,
622
+ # maximum=1.1,
623
+ # label="length penalty",
624
+ # value=0.7,
625
+ # step=0.05,
626
+ # )
627
+ # token_batch_length = gr.Radio(
628
+ # choices=TOKEN_BATCH_OPTIONS,
629
+ # label="token batch length",
630
+ # # select median option
631
+ # value=TOKEN_BATCH_OPTIONS[len(TOKEN_BATCH_OPTIONS) // 2],
632
+ # )
633
+
634
+ # with gr.Row(variant="compact"):
635
+ # repetition_penalty = gr.Slider(
636
+ # minimum=1.0,
637
+ # maximum=5.0,
638
+ # label="repetition penalty",
639
+ # value=1.5,
640
+ # step=0.1,
641
+ # )
642
+ # no_repeat_ngram_size = gr.Radio(
643
+ # choices=[2, 3, 4, 5],
644
+ # label="no repeat ngram size",
645
+ # value=3,
646
+ # )
647
+ # predrop_stopwords = gr.Checkbox(
648
+ # label="Drop Stopwords (Pre-Truncation)",
649
+ # value=False,
650
+ # )
651
+ # with gr.Column():
652
+ # gr.Markdown("## About")
653
+ # gr.Markdown(
654
+ # "- Models are fine-tuned on the [🅱️ookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that generalizes well and is useful for summarizing text in academic and everyday use."
655
+ # )
656
+ # gr.Markdown(
657
+ # "- _Update April 2023:_ Additional models fine-tuned on the [PLOS](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-plos-norm) and [ELIFE](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-elife-norm) subsets of the [scientific lay summaries](https://arxiv.org/abs/2210.09932) dataset are available (see dropdown at the top)."
658
+ # )
659
+ # gr.Markdown(
660
+ # "Adjust the max input words & max PDF pages for OCR by duplicating this space and [setting the environment variables](https://hf.co/docs/hub/spaces-overview#managing-secrets) `APP_MAX_WORDS` and `APP_OCR_MAX_PAGES` to the desired integer values."
661
+ # )
662
+ # gr.Markdown("---")
663
 
664
  # load_examples_button.click(
665
  # fn=load_single_example_text, inputs=[example_name], outputs=[input_text]