seamless-streaming

Running on T4

App Files Files Community

Mark Duppenthaler commited on Nov 13, 2023

Commit

8e82d74

•

1 Parent(s): 2d522b6

temp

Browse files

Files changed (9) hide show

.vscode/settings.json +26 -0
__pycache__/app.cpython-310.pyc +0 -0
__pycache__/lang_list.cpython-310.pyc +0 -0
__pycache__/m4t_app.cpython-310.pyc +0 -0
__pycache__/simuleval_transcoder.cpython-310.pyc +0 -0
__pycache__/test_pipeline.cpython-310.pyc +0 -0
requirements.txt +1 -1
seamless_communication +1 -0
simuleval_transcoder.py +54 -7

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.python"
+  },
+  "python.formatting.provider": "none",
+  "workbench.colorCustomizations": {
+    "activityBar.activeBackground": "#fbed80",
+    "activityBar.background": "#fbed80",
+    "activityBar.foreground": "#15202b",
+    "activityBar.inactiveForeground": "#15202b99",
+    "activityBarBadge.background": "#06b9a5",
+    "activityBarBadge.foreground": "#15202b",
+    "commandCenter.border": "#15202b99",
+    "sash.hoverBorder": "#fbed80",
+    "statusBar.background": "#f9e64f",
+    "statusBar.foreground": "#15202b",
+    "statusBarItem.hoverBackground": "#f7df1e",
+    "statusBarItem.remoteBackground": "#f9e64f",
+    "statusBarItem.remoteForeground": "#15202b",
+    "titleBar.activeBackground": "#f9e64f",
+    "titleBar.activeForeground": "#15202b",
+    "titleBar.inactiveBackground": "#f9e64f99",
+    "titleBar.inactiveForeground": "#15202b99"
+  },
+  "peacock.remoteColor": "#f9e64f"
+}

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (2.57 kB). View file

__pycache__/lang_list.cpython-310.pyc ADDED Viewed

Binary file (4.03 kB). View file

__pycache__/m4t_app.cpython-310.pyc ADDED Viewed

Binary file (8.44 kB). View file

__pycache__/simuleval_transcoder.cpython-310.pyc ADDED Viewed

Binary file (5.17 kB). View file

__pycache__/test_pipeline.cpython-310.pyc ADDED Viewed

Binary file (2.56 kB). View file

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 # fairseq2==0.1.0
 # Temp to skip
-git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
 # git+https://github.com/facebookresearch/seamless_communication
 ./seamless_communication
 # comment this out to test fairseq1 first

 # fairseq2==0.1.0
 # Temp to skip
+# git+https://github.com/mduppes/fairseq2.git@93420c86ba01349ee8f90d7adda439b666b50557
 # git+https://github.com/facebookresearch/seamless_communication
 ./seamless_communication
 # comment this out to test fairseq1 first

seamless_communication ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 02405dfd0c187d625aa66255ff8c39f98031a091

simuleval_transcoder.py CHANGED Viewed

@@ -31,12 +31,18 @@ from seamless_communication.models.vocoder import load_vocoder_model, Vocoder
-from seamless_communication.models.streaming.agents import (
-    SileroVADAgent,
-    TestTimeWaitKS2TVAD,
-    TestTimeWaitKUnityV1M4T
 )
 ### From test_pipeline
 import math
 import soundfile
@@ -98,12 +104,48 @@ def load_model_for_inference(
     model.eval()
     return model
 class SimulevalTranscoder:
     # def __init__(self, agent, sample_rate, debug, buffer_limit):
     def __init__(self):
-        print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
         device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         device = "cpu"
         print("DEVICE", device)
         model_name_or_card="seamlessM4T_medium"
@@ -145,8 +187,10 @@ class SimulevalTranscoder:
         pipeline = TestTimeWaitKUnityV1M4T(model, args)
         system_states = pipeline.build_states()
-        print('system states')
-        print(system_states)
         input_segment = np.empty(0, dtype=np.int16)
         segments = []
         while True:
@@ -156,6 +200,9 @@ class SimulevalTranscoder:
             output_segment = pipeline.pushpop(speech_segment, system_states)
             print('pushpop result')
             print(output_segment)
             if output_segment.finished:
                 segments.append(input_segment)
                 input_segment = np.empty(0, dtype=np.int16)

+# from seamless_communication.models.streaming.agents import (
+#     SileroVADAgent,
+#     TestTimeWaitKS2TVAD,
+#     TestTimeWaitKUnityV1M4T
+# )
+from seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t import (
+    TestTimeWaitKUnityS2TM4T,
 )
+from seamless_communication.cli.streaming.dataloader import Fairseq2SpeechToTextDataloader
 ### From test_pipeline
 import math
 import soundfile
     model.eval()
     return model
+def load_model_fairseq2():
+    data_configs = dict(
+        dataloader="fairseq2_s2t",
+        data_file="/large_experiments/seamless/ust/abinesh/data/s2st50_manifests/50-10/simuleval/dev_mtedx_filt_50-10_debug.tsv",
+    )
+    model_configs = dict(
+        model_name="seamlessM4T_v2_large",
+        device="cuda:0",
+        source_segment_size=320,
+        waitk_lagging=7,
+        fixed_pre_decision_ratio=2,
+        init_target_tokens="</s> __eng__",
+        max_len_a=0,
+        max_len_b=200,
+        agent_class="seamless_communication.cli.streaming.agents.tt_waitk_unity_s2t_m4t.TestTimeWaitKUnityS2TM4T",
+        task="s2st",
+        tgt_lang="eng",
+    )
+    eval_configs = dict(
+        latency_metrics="StartOffset EndOffset AL",
+        output=f"{TestTimeWaitKUnityS2TM4T.__name__}-wait{model_configs['waitk_lagging']}-debug",
+    )
+    model = TestTimeWaitKUnityS2TM4T({**data_configs, **model_configs, **eval_configs})
+    print("model", model)
+    evaluate(
+        TestTimeWaitKUnityS2TM4T, {**data_configs, **model_configs, **eval_configs}
+    )
 class SimulevalTranscoder:
     # def __init__(self, agent, sample_rate, debug, buffer_limit):
     def __init__(self):
+        # print("MDUPPES in here", SileroVADAgent, TestTimeWaitKS2TVAD)
         device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        load_model_fairseq2()
         device = "cpu"
         print("DEVICE", device)
         model_name_or_card="seamlessM4T_medium"
         pipeline = TestTimeWaitKUnityV1M4T(model, args)
         system_states = pipeline.build_states()
+        print('system states:')
+        for state in system_states:
+            print(state, vars(state))
         input_segment = np.empty(0, dtype=np.int16)
         segments = []
         while True:
             output_segment = pipeline.pushpop(speech_segment, system_states)
             print('pushpop result')
             print(output_segment)
+            print('system states after pushpop:')
+            for state in system_states:
+                print(state, vars(state))
             if output_segment.finished:
                 segments.append(input_segment)
                 input_segment = np.empty(0, dtype=np.int16)