Spaces:
Runtime error
Runtime error
File size: 2,070 Bytes
f838b34 8dd9a23 809eaa8 8dd9a23 f838b34 04ca745 a6de682 8dd9a23 a6de682 04ca745 b71a375 a6de682 8dd9a23 a6de682 b71a375 a6de682 8dd9a23 b71a375 8dd9a23 04ca745 b71a375 8dd9a23 0ca633f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import spaces
import gradio as gr
from wtpsplit import SaT
import json
# Initialize the SaT model
sat = SaT("sat-12l-sm")
sat.half().to("cuda")
@spaces.GPU(duration=59)
def segment_text(input_text, multi_doc_input):
results = {}
if input_text:
# Process single text input
sentences = sat.split(input_text)
results["input_text"] = {"segments": sentences}
if multi_doc_input:
try:
# Parse the JSON input
documents = json.loads(multi_doc_input)
for key, doc in documents.items():
sentences = sat.split(doc)
results[f"doc_{key}"] = {"segments": sentences}
except json.JSONDecodeError:
results["error"] = "Invalid JSON input for multiple documents"
# Create a JSON object with the results
json_output = json.dumps(results, indent=2)
return json_output
# Create the Gradio interface
iface = gr.Interface(
fn=segment_text,
inputs=[
gr.Textbox(lines=5, label="Input Text (Optional)"),
gr.Textbox(lines=10, label="Multiple Documents JSON (Optional)", placeholder='{"1": "Document 1 text", "2": "Document 2 text"}')
],
outputs=gr.JSON(label="Segmented Text (JSON)"),
title="Text Segmentation with SaT",
description="This app uses the SaT (Segment any Text) model to split input text into sentences and return the result as JSON. You can input text directly or provide multiple documents as JSON. All credits to the respective author(s). Github: https://github.com/segment-any-text/wtpsplit/tree/main",
examples=[
["This is a test This is another test.", ""],
["Hello this is a test But this is different now Now the next one starts looool", ""],
["The quick brown fox jumps over the lazy dog It was the best of times, it was the worst of times", ""],
["", '{"1": "Document 1 first sentence Document 1 second sentence", "2": "Document 2 only sentence", "3": "Document 3 first Document 3 second"}']
]
)
# Launch the app
iface.launch() |