Spaces:
Running
on
T4
Running
on
T4
Mark Duppenthaler
commited on
Commit
•
8e82d74
1
Parent(s):
2d522b6
temp
Browse files- .vscode/settings.json +26 -0
- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/lang_list.cpython-310.pyc +0 -0
- __pycache__/m4t_app.cpython-310.pyc +0 -0
- __pycache__/simuleval_transcoder.cpython-310.pyc +0 -0
- __pycache__/test_pipeline.cpython-310.pyc +0 -0
- requirements.txt +1 -1
- seamless_communication +1 -0
- simuleval_transcoder.py +54 -7
.vscode/settings.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[python]": {
|
3 |
+
"editor.defaultFormatter": "ms-python.python"
|
4 |
+
},
|
5 |
+
"python.formatting.provider": "none",
|
6 |
+
"workbench.colorCustomizations": {
|
7 |
+
"activityBar.activeBackground": "#fbed80",
|
8 |
+
"activityBar.background": "#fbed80",
|
9 |
+
"activityBar.foreground": "#15202b",
|
10 |
+
"activityBar.inactiveForeground": "#15202b99",
|
11 |
+
"activityBarBadge.background": "#06b9a5",
|
12 |
+
"activityBarBadge.foreground": "#15202b",
|
13 |
+
"commandCenter.border": "#15202b99",
|
14 |
+
"sash.hoverBorder": "#fbed80",
|
15 |
+
"statusBar.background": "#f9e64f",
|
16 |
+
"statusBar.foreground": "#15202b",
|
17 |
+
"statusBarItem.hoverBackground": "#f7df1e",
|
18 |
+
"statusBarItem.remoteBackground": "#f9e64f",
|
19 |
+
"statusBarItem.remoteForeground": "#15202b",
|
20 |
+
"titleBar.activeBackground": "#f9e64f",
|
21 |
+
"titleBar.activeForeground": "#15202b",
|
22 |
+
"titleBar.inactiveBackground": "#f9e64f99",
|
23 |
+
"titleBar.inactiveForeground": "#15202b99"
|
24 |
+
},
|
25 |
+
"peacock.remoteColor": "#f9e64f"
|
26 |
+
}
|
__pycache__/app.cpython-310.pyc
ADDED
Binary file (2.57 kB). View file
|
|
__pycache__/lang_list.cpython-310.pyc
ADDED
Binary file (4.03 kB). View file
|
|
__pycache__/m4t_app.cpython-310.pyc
ADDED
Binary file (8.44 kB). View file
|
|
__pycache__/simuleval_transcoder.cpython-310.pyc
ADDED
Binary file (5.17 kB). View file
|
|
__pycache__/test_pipeline.cpython-310.pyc
ADDED
Binary file (2.56 kB). View file
|
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
# fairseq2==0.1.0
|
2 |
|
3 |
# Temp to skip
|
4 |
-
git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
|
5 |
# git+https://github.com/facebookresearch/seamless_communication
|
6 |
./seamless_communication
|
7 |
# comment this out to test fairseq1 first
|
|
|
1 |
# fairseq2==0.1.0
|
2 |
|
3 |
# Temp to skip
|
4 |
+
# git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
|
5 |
# git+https://github.com/facebookresearch/seamless_communication
|
6 |
./seamless_communication
|
7 |
# comment this out to test fairseq1 first
|
seamless_communication
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 02405dfd0c187d625aa66255ff8c39f98031a091
|
simuleval_transcoder.py
CHANGED
@@ -31,12 +31,18 @@ from seamless_communication.models.vocoder import load_vocoder_model, Vocoder
|
|
31 |
|
32 |
|
33 |
|
34 |
-
from seamless_communication.models.streaming.agents import (
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
38 |
)
|
39 |
|
|
|
|
|
40 |
### From test_pipeline
|
41 |
import math
|
42 |
import soundfile
|
@@ -98,12 +104,48 @@ def load_model_for_inference(
|
|
98 |
model.eval()
|
99 |
return model
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
class SimulevalTranscoder:
|
102 |
# def __init__(self, agent, sample_rate, debug, buffer_limit):
|
103 |
def __init__(self):
|
104 |
-
print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
|
105 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
106 |
|
|
|
|
|
|
|
|
|
107 |
device = "cpu"
|
108 |
print("DEVICE", device)
|
109 |
model_name_or_card="seamlessM4T_medium"
|
@@ -145,8 +187,10 @@ class SimulevalTranscoder:
|
|
145 |
|
146 |
pipeline = TestTimeWaitKUnityV1M4T(model, args)
|
147 |
system_states = pipeline.build_states()
|
148 |
-
print('system states')
|
149 |
-
|
|
|
|
|
150 |
input_segment = np.empty(0, dtype=np.int16)
|
151 |
segments = []
|
152 |
while True:
|
@@ -156,6 +200,9 @@ class SimulevalTranscoder:
|
|
156 |
output_segment = pipeline.pushpop(speech_segment, system_states)
|
157 |
print('pushpop result')
|
158 |
print(output_segment)
|
|
|
|
|
|
|
159 |
if output_segment.finished:
|
160 |
segments.append(input_segment)
|
161 |
input_segment = np.empty(0, dtype=np.int16)
|
|
|
31 |
|
32 |
|
33 |
|
34 |
+
# from seamless_communication.models.streaming.agents import (
|
35 |
+
# SileroVADAgent,
|
36 |
+
# TestTimeWaitKS2TVAD,
|
37 |
+
# TestTimeWaitKUnityV1M4T
|
38 |
+
# )
|
39 |
+
|
40 |
+
from seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t import (
|
41 |
+
TestTimeWaitKUnityS2TM4T,
|
42 |
)
|
43 |
|
44 |
+
from seamless_communication.cli.streaming.dataloader import Fairseq2SpeechToTextDataloader
|
45 |
+
|
46 |
### From test_pipeline
|
47 |
import math
|
48 |
import soundfile
|
|
|
104 |
model.eval()
|
105 |
return model
|
106 |
|
107 |
+
def load_model_fairseq2():
|
108 |
+
data_configs = dict(
|
109 |
+
dataloader="fairseq2_s2t",
|
110 |
+
data_file="/large_experiments/seamless/ust/abinesh/data/s2st50_manifests/50-10/simuleval/dev_mtedx_filt_50-10_debug.tsv",
|
111 |
+
)
|
112 |
+
|
113 |
+
model_configs = dict(
|
114 |
+
model_name="seamlessM4T_v2_large",
|
115 |
+
device="cuda:0",
|
116 |
+
source_segment_size=320,
|
117 |
+
waitk_lagging=7,
|
118 |
+
fixed_pre_decision_ratio=2,
|
119 |
+
init_target_tokens="</s> __eng__",
|
120 |
+
max_len_a=0,
|
121 |
+
max_len_b=200,
|
122 |
+
agent_class="seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t.TestTimeWaitKUnityS2TM4T",
|
123 |
+
task="s2st",
|
124 |
+
tgt_lang="eng",
|
125 |
+
)
|
126 |
+
|
127 |
+
eval_configs = dict(
|
128 |
+
latency_metrics="StartOffset EndOffset AL",
|
129 |
+
output=f"{TestTimeWaitKUnityS2TM4T.__name__}-wait{model_configs['waitk_lagging']}-debug",
|
130 |
+
)
|
131 |
+
|
132 |
+
model = TestTimeWaitKUnityS2TM4T({**data_configs, **model_configs, **eval_configs})
|
133 |
+
print("model", model)
|
134 |
+
|
135 |
+
evaluate(
|
136 |
+
TestTimeWaitKUnityS2TM4T, {**data_configs, **model_configs, **eval_configs}
|
137 |
+
)
|
138 |
+
|
139 |
class SimulevalTranscoder:
|
140 |
# def __init__(self, agent, sample_rate, debug, buffer_limit):
|
141 |
def __init__(self):
|
142 |
+
# print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
|
143 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
144 |
|
145 |
+
|
146 |
+
|
147 |
+
load_model_fairseq2()
|
148 |
+
|
149 |
device = "cpu"
|
150 |
print("DEVICE", device)
|
151 |
model_name_or_card="seamlessM4T_medium"
|
|
|
187 |
|
188 |
pipeline = TestTimeWaitKUnityV1M4T(model, args)
|
189 |
system_states = pipeline.build_states()
|
190 |
+
print('system states:')
|
191 |
+
for state in system_states:
|
192 |
+
print(state, vars(state))
|
193 |
+
|
194 |
input_segment = np.empty(0, dtype=np.int16)
|
195 |
segments = []
|
196 |
while True:
|
|
|
200 |
output_segment = pipeline.pushpop(speech_segment, system_states)
|
201 |
print('pushpop result')
|
202 |
print(output_segment)
|
203 |
+
print('system states after pushpop:')
|
204 |
+
for state in system_states:
|
205 |
+
print(state, vars(state))
|
206 |
if output_segment.finished:
|
207 |
segments.append(input_segment)
|
208 |
input_segment = np.empty(0, dtype=np.int16)
|