bark-Ai-audio

Build error

App Files Files Community

usamakenway

georg-suno commited on Apr 25, 2023

Commit

5a027a4

•

0 Parent(s):

Duplicate from suno/bark

Browse files

Co-authored-by: Georg Kucsko <[email protected]>

Files changed (4) hide show

.gitattributes +34 -0
README.md +14 -0
app.py +146 -0
requirements.txt +5 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Bark
+emoji: 🐶
+colorFrom: pink
+colorTo: blue
+sdk: gradio
+sdk_version: 3.24.1
+app_file: app.py
+pinned: false
+license: cc-by-nc-4.0
+duplicated_from: suno/bark
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import numpy as np
+import gradio as gr
+from bark import SAMPLE_RATE, generate_audio, preload_models
+from bark.generation import SUPPORTED_LANGS
+DEBUG_MODE = False
+if not DEBUG_MODE:
+    _ = preload_models()
+AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
+PROMPT_LOOKUP = {}
+for _, lang in SUPPORTED_LANGS:
+    for n in range(10):
+        label = f"Speaker {n} ({lang})"
+        AVAILABLE_PROMPTS.append(label)
+        PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
+PROMPT_LOOKUP["Unconditional"] = None
+PROMPT_LOOKUP["Announcer"] = "announcer"
+default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
+title = "<div style='text-align:left'>🐶 Bark</div>"
+description = """
+<div>
+<a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
+<a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
+<a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
+</div>
+Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
+This demo should be used for research purposes only. Commercial use is strictly prohibited. \
+The model output is not censored and the authors do not endorse the opinions in the generated content. \
+Use at your own risk.
+"""
+article = """
+## 🌎 Foreign Language
+Bark supports various languages out-of-the-box and automatically determines language from input text. \
+When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
+Try the prompt:
+```
+Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
+```
+## 🤭 Non-Speech Sounds
+Below is a list of some known non-speech sounds, but we are finding more every day. \
+Please let us know if you find patterns that work particularly well on Discord!
+* [laughter]
+* [laughs]
+* [sighs]
+* [music]
+* [gasps]
+* [clears throat]
+* — or ... for hesitations
+* ♪ for song lyrics
+* capitalization for emphasis of a word
+* MAN/WOMAN: for bias towards speaker
+Try the prompt:
+```
+" [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
+```
+## 🎶 Music
+Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \
+Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
+Try the prompt:
+```
+♪ In the jungle, the mighty jungle, the lion barks tonight ♪
+```
+## 🧬 Voice Cloning
+Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \
+The model also attempts to preserve music, ambient noise, etc. from input audio. \
+However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.
+## 👥 Speaker Prompts
+You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \
+Please note that these are not always respected, especially if a conflicting audio history prompt is given.
+Try the prompt:
+```
+WOMAN: I would like an oatmilk latte please.
+MAN: Wow, that's expensive!
+```
+## Details
+Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark) and model weights. \
+Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC, see details on [GitHub](https://github.com/suno-ai/bark).
+"""
+examples = [
+    ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!", "Unconditional"],#, 0.7, 0.7],
+    ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.", "Speaker 1 (en)"],#, 0.7, 0.7],
+    ["Buenos días Miguel. Tu colega piensa que tu alem��n es extremadamente malo. But I suppose your english isn't terrible.",  "Speaker 0 (es)"],#, 0.7, 0.7],
+]
+def gen_tts(text, history_prompt):#, temp_semantic, temp_waveform):
+    history_prompt = PROMPT_LOOKUP[history_prompt]
+    if DEBUG_MODE:
+        audio_arr = np.zeros(SAMPLE_RATE)
+    else:
+        audio_arr = generate_audio(text, history_prompt=history_prompt)#, text_temp=temp_semantic, waveform_temp=temp_waveform)
+    audio_arr = (audio_arr * 32767).astype(np.int16)
+    return (SAMPLE_RATE, audio_arr)
+iface = gr.Interface(
+    fn=gen_tts,
+    inputs=[
+        gr.Textbox(label="Input Text", lines=2, value=default_text),
+        gr.Dropdown(AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt"),
+        # gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
+        # gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
+    ],
+    outputs=[
+        gr.Audio(label="Generated Audio", type="numpy"),
+    ],
+    title=title,
+    description=description,
+    article=article,
+    examples=examples,
+    cache_examples=False,
+)
+with gr.Group(elem_id="share-btn-container", visible=False):
+    share_button = gr.Button("Share to community", elem_id="share-btn")
+iface.launch(enable_queue=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+git+https://github.com/suno-ai/bark.git
+https://download.pytorch.org/whl/nightly/pytorch_triton-2.1.0%2B46672772b4-cp38-cp38-linux_x86_64.whl
+https://download.pytorch.org/whl/nightly/cu117/torch-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
+https://download.pytorch.org/whl/nightly/cu117/torchvision-0.16.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
+https://download.pytorch.org/whl/nightly/cu117/torchaudio-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl