Spaces:
Sleeping
Sleeping
Upload utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from itertools import islice
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
# Sliding window function
|
5 |
+
def window(seq, n=3):
|
6 |
+
"""https://stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator
|
7 |
+
Returns a sliding window of width n over data from the iterable seq"""
|
8 |
+
|
9 |
+
it = iter(seq)
|
10 |
+
result = tuple(islice(it, n))
|
11 |
+
if len(result) == n:
|
12 |
+
yield result
|
13 |
+
for elem in it:
|
14 |
+
result = result[1:] + (elem,)
|
15 |
+
yield result
|
16 |
+
|
17 |
+
# Compute depth scores
|
18 |
+
def get_depths(scores):
|
19 |
+
"""Given a sequence of coherence scores of length n, compute a sequence of depth scores of similar length"""
|
20 |
+
|
21 |
+
def climb(seq, i, mode='left'):
|
22 |
+
"""Given a sequence seq of values and index i, advance the index either to the right or left while the
|
23 |
+
value keeps increasing, then return the value at new index
|
24 |
+
"""
|
25 |
+
if mode == 'left':
|
26 |
+
while True:
|
27 |
+
curr = seq[i]
|
28 |
+
if i == 0:
|
29 |
+
return curr
|
30 |
+
i = i-1
|
31 |
+
if not seq[i] > curr:
|
32 |
+
return curr
|
33 |
+
|
34 |
+
if mode == 'right':
|
35 |
+
while True:
|
36 |
+
curr = seq[i]
|
37 |
+
if i == (len(seq)-1):
|
38 |
+
return curr
|
39 |
+
i = i+1
|
40 |
+
if not seq[i] > curr:
|
41 |
+
return curr
|
42 |
+
|
43 |
+
depths = []
|
44 |
+
for i in range(len(scores)):
|
45 |
+
score = scores[i]
|
46 |
+
l_peak = climb(scores, i, mode='left')
|
47 |
+
r_peak = climb(scores, i, mode='right')
|
48 |
+
depth = 0.5 * (l_peak + r_peak - (2*score))
|
49 |
+
depths.append(depth)
|
50 |
+
|
51 |
+
return np.array(depths)
|
52 |
+
|
53 |
+
|
54 |
+
from scipy.signal import argrelmax
|
55 |
+
|
56 |
+
# Filter out local maxima
|
57 |
+
def get_local_maxima(depth_scores, order=1):
|
58 |
+
"""Given a sequence of depth scores, return a filtered sequence where only local maxima
|
59 |
+
selected based on the given order"""
|
60 |
+
|
61 |
+
maxima_ids = argrelmax(depth_scores, order=order)[0]
|
62 |
+
filtered_scores = np.zeros(len(depth_scores))
|
63 |
+
filtered_scores[maxima_ids] = depth_scores[maxima_ids]
|
64 |
+
return filtered_scores
|
65 |
+
|
66 |
+
# Automatic threshold computation
|
67 |
+
def compute_threshold(scores):
|
68 |
+
"""From Texttiling: https://aclanthology.org/J97-1003.pdf
|
69 |
+
Automatically compute an appropriate threshold given a sequence of depth scores
|
70 |
+
"""
|
71 |
+
|
72 |
+
s = scores[np.nonzero(scores)]
|
73 |
+
threshold = np.mean(s) - (np.std(s) / 2)
|
74 |
+
# threshold = np.mean(s) - (np.std(s))
|
75 |
+
return threshold
|
76 |
+
|
77 |
+
def get_threshold_segments(scores, threshold=0.1):
|
78 |
+
"""Given a sequence of depth scores, return indexes where the value is greater than the threshold"""
|
79 |
+
segment_ids = np.where(scores >= threshold)[0]
|
80 |
+
return segment_ids
|