document-summarization

Runtime error

App Files Files Community

MeetJivani commited on Sep 21, 2023

Commit

ef4c284

•

1 Parent(s): e37a87f

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -81

app.py CHANGED Viewed

@@ -192,11 +192,11 @@ def predict(
 def proc_submission(
     input_text: str,
     model_name: str,
-    token_batch_length: int,
-    length_penalty: float,
-    repetition_penalty: float,
-    no_repeat_ngram_size: int,
-    predrop_stopwords: bool,
     num_beams: int = 3,
     max_input_length: int = 8182,
 ):
@@ -503,8 +503,6 @@ if __name__ == "__main__":
             gr.Markdown("## Load Inputs & Select Parameters")
             gr.Markdown(
                 """Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
-                # See [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for details.
                 """
             )
             with gr.Row(variant="compact"):
@@ -559,7 +557,7 @@ if __name__ == "__main__":
                 # gr.Markdown(
                 #     "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
                 # )
-            output_text = gr.HTML("<p><em>_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._</em></p>")
             with gr.Column():
                 gr.Markdown("### Results & Scores")
                 with gr.Row():
@@ -587,81 +585,81 @@ if __name__ == "__main__":
                     label="Summary",
                     value="<center><i>Summary will appear here!</i></center>",
                 )
-            with gr.Column():
-                gr.Markdown("### **Aggregate Summary Batches**")
-                gr.Markdown(
-                    "_Note: this is an experimental feature. Feedback welcome in the [discussions](https://hf.co/spaces/pszemraj/document-summarization/discussions)!_"
-                )
-                with gr.Row():
-                    aggregate_button = gr.Button(
-                        "Aggregate!",
-                        variant="primary",
-                    )
-                    gr.Markdown(
-                        f"""Aggregate the above batches into a cohesive summary.
-                    - A secondary instruct-tuned LM consolidates info
-                    - Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
-                                """
-                    )
-                with gr.Column(variant="panel"):
-                    aggregated_summary = gr.HTML(
-                        label="Aggregate Summary",
-                        value="<center><i>Aggregate summary will appear here!</i></center>",
-                    )
-                    gr.Markdown(
-                        "\n\n_Aggregate summary is also appended to the bottom of the `.txt` file._"
-                    )
         gr.Markdown("---")
-        with gr.Column():
-            gr.Markdown("### Advanced Settings")
-            gr.Markdown(
-                "Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_."
-            )
-            with gr.Row(variant="compact"):
-                length_penalty = gr.Slider(
-                    minimum=0.3,
-                    maximum=1.1,
-                    label="length penalty",
-                    value=0.7,
-                    step=0.05,
-                )
-                token_batch_length = gr.Radio(
-                    choices=TOKEN_BATCH_OPTIONS,
-                    label="token batch length",
-                    # select median option
-                    value=TOKEN_BATCH_OPTIONS[len(TOKEN_BATCH_OPTIONS) // 2],
-                )
-            with gr.Row(variant="compact"):
-                repetition_penalty = gr.Slider(
-                    minimum=1.0,
-                    maximum=5.0,
-                    label="repetition penalty",
-                    value=1.5,
-                    step=0.1,
-                )
-                no_repeat_ngram_size = gr.Radio(
-                    choices=[2, 3, 4, 5],
-                    label="no repeat ngram size",
-                    value=3,
-                )
-                predrop_stopwords = gr.Checkbox(
-                    label="Drop Stopwords (Pre-Truncation)",
-                    value=False,
-                )
-        with gr.Column():
-            gr.Markdown("## About")
-            gr.Markdown(
-                "- Models are fine-tuned on the [🅱️ookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that generalizes well and is useful for summarizing text in academic and everyday use."
-            )
-            gr.Markdown(
-                "- _Update April 2023:_ Additional models fine-tuned on the [PLOS](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-plos-norm) and [ELIFE](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-elife-norm) subsets of the [scientific lay summaries](https://arxiv.org/abs/2210.09932) dataset are available (see dropdown at the top)."
-            )
-            gr.Markdown(
-                "Adjust the max input words & max PDF pages for OCR by duplicating this space and [setting the environment variables](https://hf.co/docs/hub/spaces-overview#managing-secrets) `APP_MAX_WORDS` and `APP_OCR_MAX_PAGES` to the desired integer values."
-            )
-            gr.Markdown("---")
         # load_examples_button.click(
         #     fn=load_single_example_text, inputs=[example_name], outputs=[input_text]

 def proc_submission(
     input_text: str,
     model_name: str,
+    predrop_stopwords: bool = False,
+    repetition_penalty: float = 0.5,
+    no_repeat_ngram_size: int = 3,
+    length_penalty: float = 1.5,
+    token_batch_length: int = 1530,
     num_beams: int = 3,
     max_input_length: int = 8182,
 ):
             gr.Markdown("## Load Inputs & Select Parameters")
             gr.Markdown(
                 """Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
                 """
             )
             with gr.Row(variant="compact"):
                 # gr.Markdown(
                 #     "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
                 # )
+            output_text = gr.HTML("<p><em>Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios.</em></p>")
             with gr.Column():
                 gr.Markdown("### Results & Scores")
                 with gr.Row():
                     label="Summary",
                     value="<center><i>Summary will appear here!</i></center>",
                 )
+            # with gr.Column():
+            #     gr.Markdown("### **Aggregate Summary Batches**")
+            #     gr.Markdown(
+            #         "_Note: this is an experimental feature. Feedback welcome in the [discussions](https://hf.co/spaces/pszemraj/document-summarization/discussions)!_"
+            #     )
+            #     with gr.Row():
+            #         aggregate_button = gr.Button(
+            #             "Aggregate!",
+            #             variant="primary",
+            #         )
+            #         gr.Markdown(
+            #             f"""Aggregate the above batches into a cohesive summary.
+            #         - A secondary instruct-tuned LM consolidates info
+            #         - Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
+            #                     """
+            #         )
+            #     with gr.Column(variant="panel"):
+            #         aggregated_summary = gr.HTML(
+            #             label="Aggregate Summary",
+            #             value="<center><i>Aggregate summary will appear here!</i></center>",
+            #         )
+            #         gr.Markdown(
+            #             "\n\n_Aggregate summary is also appended to the bottom of the `.txt` file._"
+            #         )
         gr.Markdown("---")
+        # with gr.Column():
+        #     gr.Markdown("### Advanced Settings")
+        #     gr.Markdown(
+        #         "Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_."
+        #     )
+        #     with gr.Row(variant="compact"):
+        #         length_penalty = gr.Slider(
+        #             minimum=0.3,
+        #             maximum=1.1,
+        #             label="length penalty",
+        #             value=0.7,
+        #             step=0.05,
+        #         )
+        #         token_batch_length = gr.Radio(
+        #             choices=TOKEN_BATCH_OPTIONS,
+        #             label="token batch length",
+        #             # select median option
+        #             value=TOKEN_BATCH_OPTIONS[len(TOKEN_BATCH_OPTIONS) // 2],
+        #         )
+        #     with gr.Row(variant="compact"):
+        #         repetition_penalty = gr.Slider(
+        #             minimum=1.0,
+        #             maximum=5.0,
+        #             label="repetition penalty",
+        #             value=1.5,
+        #             step=0.1,
+        #         )
+        #         no_repeat_ngram_size = gr.Radio(
+        #             choices=[2, 3, 4, 5],
+        #             label="no repeat ngram size",
+        #             value=3,
+        #         )
+        #         predrop_stopwords = gr.Checkbox(
+        #             label="Drop Stopwords (Pre-Truncation)",
+        #             value=False,
+        #         )
+        # with gr.Column():
+        #     gr.Markdown("## About")
+        #     gr.Markdown(
+        #         "- Models are fine-tuned on the [🅱️ookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that generalizes well and is useful for summarizing text in academic and everyday use."
+        #     )
+        #     gr.Markdown(
+        #         "- _Update April 2023:_ Additional models fine-tuned on the [PLOS](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-plos-norm) and [ELIFE](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-elife-norm) subsets of the [scientific lay summaries](https://arxiv.org/abs/2210.09932) dataset are available (see dropdown at the top)."
+        #     )
+        #     gr.Markdown(
+        #         "Adjust the max input words & max PDF pages for OCR by duplicating this space and [setting the environment variables](https://hf.co/docs/hub/spaces-overview#managing-secrets) `APP_MAX_WORDS` and `APP_OCR_MAX_PAGES` to the desired integer values."
+        #     )
+        #     gr.Markdown("---")
         # load_examples_button.click(
         #     fn=load_single_example_text, inputs=[example_name], outputs=[input_text]