Mark Duppenthaler commited on
Commit
8e82d74
1 Parent(s): 2d522b6
.vscode/settings.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.defaultFormatter": "ms-python.python"
4
+ },
5
+ "python.formatting.provider": "none",
6
+ "workbench.colorCustomizations": {
7
+ "activityBar.activeBackground": "#fbed80",
8
+ "activityBar.background": "#fbed80",
9
+ "activityBar.foreground": "#15202b",
10
+ "activityBar.inactiveForeground": "#15202b99",
11
+ "activityBarBadge.background": "#06b9a5",
12
+ "activityBarBadge.foreground": "#15202b",
13
+ "commandCenter.border": "#15202b99",
14
+ "sash.hoverBorder": "#fbed80",
15
+ "statusBar.background": "#f9e64f",
16
+ "statusBar.foreground": "#15202b",
17
+ "statusBarItem.hoverBackground": "#f7df1e",
18
+ "statusBarItem.remoteBackground": "#f9e64f",
19
+ "statusBarItem.remoteForeground": "#15202b",
20
+ "titleBar.activeBackground": "#f9e64f",
21
+ "titleBar.activeForeground": "#15202b",
22
+ "titleBar.inactiveBackground": "#f9e64f99",
23
+ "titleBar.inactiveForeground": "#15202b99"
24
+ },
25
+ "peacock.remoteColor": "#f9e64f"
26
+ }
__pycache__/app.cpython-310.pyc ADDED
Binary file (2.57 kB). View file
 
__pycache__/lang_list.cpython-310.pyc ADDED
Binary file (4.03 kB). View file
 
__pycache__/m4t_app.cpython-310.pyc ADDED
Binary file (8.44 kB). View file
 
__pycache__/simuleval_transcoder.cpython-310.pyc ADDED
Binary file (5.17 kB). View file
 
__pycache__/test_pipeline.cpython-310.pyc ADDED
Binary file (2.56 kB). View file
 
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  # fairseq2==0.1.0
2
 
3
  # Temp to skip
4
- git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
5
  # git+https://github.com/facebookresearch/seamless_communication
6
  ./seamless_communication
7
  # comment this out to test fairseq1 first
 
1
  # fairseq2==0.1.0
2
 
3
  # Temp to skip
4
+ # git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
5
  # git+https://github.com/facebookresearch/seamless_communication
6
  ./seamless_communication
7
  # comment this out to test fairseq1 first
seamless_communication ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 02405dfd0c187d625aa66255ff8c39f98031a091
simuleval_transcoder.py CHANGED
@@ -31,12 +31,18 @@ from seamless_communication.models.vocoder import load_vocoder_model, Vocoder
31
 
32
 
33
 
34
- from seamless_communication.models.streaming.agents import (
35
- SileroVADAgent,
36
- TestTimeWaitKS2TVAD,
37
- TestTimeWaitKUnityV1M4T
 
 
 
 
38
  )
39
 
 
 
40
  ### From test_pipeline
41
  import math
42
  import soundfile
@@ -98,12 +104,48 @@ def load_model_for_inference(
98
  model.eval()
99
  return model
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  class SimulevalTranscoder:
102
  # def __init__(self, agent, sample_rate, debug, buffer_limit):
103
  def __init__(self):
104
- print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
105
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
106
 
 
 
 
 
107
  device = "cpu"
108
  print("DEVICE", device)
109
  model_name_or_card="seamlessM4T_medium"
@@ -145,8 +187,10 @@ class SimulevalTranscoder:
145
 
146
  pipeline = TestTimeWaitKUnityV1M4T(model, args)
147
  system_states = pipeline.build_states()
148
- print('system states')
149
- print(system_states)
 
 
150
  input_segment = np.empty(0, dtype=np.int16)
151
  segments = []
152
  while True:
@@ -156,6 +200,9 @@ class SimulevalTranscoder:
156
  output_segment = pipeline.pushpop(speech_segment, system_states)
157
  print('pushpop result')
158
  print(output_segment)
 
 
 
159
  if output_segment.finished:
160
  segments.append(input_segment)
161
  input_segment = np.empty(0, dtype=np.int16)
 
31
 
32
 
33
 
34
+ # from seamless_communication.models.streaming.agents import (
35
+ # SileroVADAgent,
36
+ # TestTimeWaitKS2TVAD,
37
+ # TestTimeWaitKUnityV1M4T
38
+ # )
39
+
40
+ from seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t import (
41
+ TestTimeWaitKUnityS2TM4T,
42
  )
43
 
44
+ from seamless_communication.cli.streaming.dataloader import Fairseq2SpeechToTextDataloader
45
+
46
  ### From test_pipeline
47
  import math
48
  import soundfile
 
104
  model.eval()
105
  return model
106
 
107
+ def load_model_fairseq2():
108
+ data_configs = dict(
109
+ dataloader="fairseq2_s2t",
110
+ data_file="/large_experiments/seamless/ust/abinesh/data/s2st50_manifests/50-10/simuleval/dev_mtedx_filt_50-10_debug.tsv",
111
+ )
112
+
113
+ model_configs = dict(
114
+ model_name="seamlessM4T_v2_large",
115
+ device="cuda:0",
116
+ source_segment_size=320,
117
+ waitk_lagging=7,
118
+ fixed_pre_decision_ratio=2,
119
+ init_target_tokens="</s> __eng__",
120
+ max_len_a=0,
121
+ max_len_b=200,
122
+ agent_class="seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t.TestTimeWaitKUnityS2TM4T",
123
+ task="s2st",
124
+ tgt_lang="eng",
125
+ )
126
+
127
+ eval_configs = dict(
128
+ latency_metrics="StartOffset EndOffset AL",
129
+ output=f"{TestTimeWaitKUnityS2TM4T.__name__}-wait{model_configs['waitk_lagging']}-debug",
130
+ )
131
+
132
+ model = TestTimeWaitKUnityS2TM4T({**data_configs, **model_configs, **eval_configs})
133
+ print("model", model)
134
+
135
+ evaluate(
136
+ TestTimeWaitKUnityS2TM4T, {**data_configs, **model_configs, **eval_configs}
137
+ )
138
+
139
  class SimulevalTranscoder:
140
  # def __init__(self, agent, sample_rate, debug, buffer_limit):
141
  def __init__(self):
142
+ # print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
143
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
144
 
145
+
146
+
147
+ load_model_fairseq2()
148
+
149
  device = "cpu"
150
  print("DEVICE", device)
151
  model_name_or_card="seamlessM4T_medium"
 
187
 
188
  pipeline = TestTimeWaitKUnityV1M4T(model, args)
189
  system_states = pipeline.build_states()
190
+ print('system states:')
191
+ for state in system_states:
192
+ print(state, vars(state))
193
+
194
  input_segment = np.empty(0, dtype=np.int16)
195
  segments = []
196
  while True:
 
200
  output_segment = pipeline.pushpop(speech_segment, system_states)
201
  print('pushpop result')
202
  print(output_segment)
203
+ print('system states after pushpop:')
204
+ for state in system_states:
205
+ print(state, vars(state))
206
  if output_segment.finished:
207
  segments.append(input_segment)
208
  input_segment = np.empty(0, dtype=np.int16)