Overthrow4232 commited on
Commit
001bc99
1 Parent(s): ed93b21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -2,31 +2,27 @@ import spaces
2
  import gradio as gr
3
  from wtpsplit import SaT
4
  import json
5
- import zipfile
6
- import io
7
- import os
8
 
9
  # Initialize the SaT model
10
  sat = SaT("sat-3l-sm")
11
  sat.half().to("cuda")
12
 
13
  @spaces.GPU(duration=59)
14
- def segment_text(input_text, zip_file):
15
  results = {}
16
 
17
  if input_text:
18
  # Process single text input
19
  sentences = sat.split(input_text)
20
  results["input_text"] = {"segments": sentences}
21
- elif zip_file is not None:
22
- # Process zip file
23
- with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
24
- for file_name in zip_ref.namelist():
25
- if file_name.endswith('.txt'):
26
- with zip_ref.open(file_name) as file:
27
- content = file.read().decode('utf-8')
28
- sentences = sat.split(content)
29
- results[file_name] = {"segments": sentences}
30
 
31
  # Create a JSON object with the results
32
  json_output = json.dumps(results, indent=2)
@@ -38,11 +34,11 @@ iface = gr.Interface(
38
  fn=segment_text,
39
  inputs=[
40
  gr.Textbox(lines=5, label="Input Text (Optional)"),
41
- gr.File(label="Upload ZIP file (Optional)", file_types=[".zip"])
42
  ],
43
  outputs=gr.JSON(label="Segmented Text (JSON)"),
44
  title="Text Segmentation with SaT",
45
- description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or upload a ZIP file containing multiple .txt files. All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main",
46
  examples=[
47
  ["This is a test This is another test.", None],
48
  ["Hello this is a test But this is different now Now the next one starts looool", None],
 
2
  import gradio as gr
3
  from wtpsplit import SaT
4
  import json
 
 
 
5
 
6
  # Initialize the SaT model
7
  sat = SaT("sat-3l-sm")
8
  sat.half().to("cuda")
9
 
10
  @spaces.GPU(duration=59)
11
+ def segment_text(input_text, txt_file):
12
  results = {}
13
 
14
  if input_text:
15
  # Process single text input
16
  sentences = sat.split(input_text)
17
  results["input_text"] = {"segments": sentences}
18
+ elif txt_file is not None:
19
+ # Process txt file
20
+ with open(txt_file.name, 'r', encoding='utf-8') as file:
21
+ for i, line in enumerate(file, 1):
22
+ line = line.strip()
23
+ if line: # Skip empty lines
24
+ sentences = sat.split(line)
25
+ results[f"document_{i}"] = {"segments": sentences}
 
26
 
27
  # Create a JSON object with the results
28
  json_output = json.dumps(results, indent=2)
 
34
  fn=segment_text,
35
  inputs=[
36
  gr.Textbox(lines=5, label="Input Text (Optional)"),
37
+ gr.File(label="Upload TXT file (Optional) Row-separated", file_types=[".txt"])
38
  ],
39
  outputs=gr.JSON(label="Segmented Text (JSON)"),
40
  title="Text Segmentation with SaT",
41
+ description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or upload a TXT file containing multiple documents (one per line). All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main",
42
  examples=[
43
  ["This is a test This is another test.", None],
44
  ["Hello this is a test But this is different now Now the next one starts looool", None],