blazingbunny commited on
Commit
f1e7200
1 Parent(s): b42b34a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import re
4
- import nltk
5
 
6
- nltk.download('punkt')
7
- from nltk.tokenize import sent_tokenize
 
8
 
9
  st.title('Hugging Face BERT Summarizer')
10
 
@@ -29,8 +29,8 @@ if uploaded_file is not None and keywords:
29
  user_input = uploaded_file.read().decode('utf-8')
30
  keywords = [keyword.strip() for keyword in keywords.split(",")]
31
 
32
- # Split text into sentences
33
- sentences = sent_tokenize(user_input)
34
 
35
  # Filter sentences based on keywords
36
  filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import re
 
4
 
5
+ def custom_sentence_splitter(text):
6
+ # Simple regex to split sentences by periods, exclamations, or question marks followed by a space
7
+ return re.split(r'(?<=[.!?]) +', text)
8
 
9
  st.title('Hugging Face BERT Summarizer')
10
 
 
29
  user_input = uploaded_file.read().decode('utf-8')
30
  keywords = [keyword.strip() for keyword in keywords.split(",")]
31
 
32
+ # Split text into sentences using the custom function
33
+ sentences = custom_sentence_splitter(user_input)
34
 
35
  # Filter sentences based on keywords
36
  filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]