Overthrow4232 commited on
Commit
a6de682
1 Parent(s): 11c1460

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -12
app.py CHANGED
@@ -2,34 +2,50 @@ import spaces
2
  import gradio as gr
3
  from wtpsplit import SaT
4
  import json
 
 
5
 
6
  # Initialize the SaT model
7
- # We use 'sat-3l-sm' as it's recommended for general sentence segmentation tasks
8
- # and offers a good balance between speed and performance
9
  sat = SaT("sat-3l-sm")
10
  sat.half().to("cuda")
11
 
12
  @spaces.GPU(duration=59)
13
- def segment_text(input_text):
14
- # Use the SaT model to split the input text into sentences
15
- sentences = sat.split(input_text)
16
 
17
- # Create a JSON object where each sentence is a separate item
18
- json_output = json.dumps({"segments": sentences}, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  return json_output
21
 
22
  # Create the Gradio interface
23
  iface = gr.Interface(
24
  fn=segment_text,
25
- inputs=gr.Textbox(lines=5, label="Input Text"),
 
 
 
26
  outputs=gr.JSON(label="Segmented Text (JSON)"),
27
  title="Text Segmentation with SaT",
28
- description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main",
29
  examples=[
30
- ["This is a test This is another test."],
31
- ["Hello this is a test But this is different now Now the next one starts looool"],
32
- ["The quick brown fox jumps over the lazy dog It was the best of times, it was the worst of times"],
33
  ]
34
  )
35
 
 
2
  import gradio as gr
3
  from wtpsplit import SaT
4
  import json
5
+ import zipfile
6
+ import io
7
 
8
  # Initialize the SaT model
 
 
9
  sat = SaT("sat-3l-sm")
10
  sat.half().to("cuda")
11
 
12
  @spaces.GPU(duration=59)
13
+ def segment_text(input_text, zip_file):
14
+ results = {}
 
15
 
16
+ if input_text:
17
+ # Process single text input
18
+ sentences = sat.split(input_text)
19
+ results["input_text"] = {"segments": sentences}
20
+ elif zip_file is not None:
21
+ # Process zip file
22
+ with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref:
23
+ for file_name in zip_ref.namelist():
24
+ if file_name.endswith('.txt'):
25
+ with zip_ref.open(file_name) as file:
26
+ content = file.read().decode('utf-8')
27
+ sentences = sat.split(content)
28
+ results[file_name] = {"segments": sentences}
29
+
30
+ # Create a JSON object with the results
31
+ json_output = json.dumps(results, indent=2)
32
 
33
  return json_output
34
 
35
  # Create the Gradio interface
36
  iface = gr.Interface(
37
  fn=segment_text,
38
+ inputs=[
39
+ gr.Textbox(lines=5, label="Input Text (Optional)"),
40
+ gr.File(label="Upload ZIP file (Optional)", file_types=[".zip"])
41
+ ],
42
  outputs=gr.JSON(label="Segmented Text (JSON)"),
43
  title="Text Segmentation with SaT",
44
+ description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or upload a ZIP file containing multiple .txt files. All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main",
45
  examples=[
46
+ ["This is a test This is another test.", None],
47
+ ["Hello this is a test But this is different now Now the next one starts looool", None],
48
+ ["The quick brown fox jumps over the lazy dog It was the best of times, it was the worst of times", None],
49
  ]
50
  )
51