WarriorMama777
commited on
Commit
โข
aeee5b4
1
Parent(s):
19c564c
readme and add models
Browse files- Models/GPT-SoVITS/GPT_weights/Portal_GLaDOS_GPT-SoVITS_v1.1-e15.ckpt +3 -0
- Models/GPT-SoVITS/SoVITS_weights/Portal_GLaDOS_GPT-SoVITS_v1.1_e8_s576.pth +3 -0
- Models/Style-Bert_VITS2/Portal_GLaDOS_v1/Portal_GLaDOS_v1_e782_s50000.safetensors +3 -0
- Models/Style-Bert_VITS2/Portal_GLaDOS_v1/config.json +112 -0
- Models/Style-Bert_VITS2/Portal_GLaDOS_v1/style_vectors.npy +3 -0
- README.md +72 -0
Models/GPT-SoVITS/GPT_weights/Portal_GLaDOS_GPT-SoVITS_v1.1-e15.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15a6d320338e07f148bde038ca564b69f908da323b98133caa42ba0f94455c46
|
3 |
+
size 155083581
|
Models/GPT-SoVITS/SoVITS_weights/Portal_GLaDOS_GPT-SoVITS_v1.1_e8_s576.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d40b54506ddfe0335a71a96f6d4e7d5dfa7acacedac80c9492d4d87c35a17109
|
3 |
+
size 84923207
|
Models/Style-Bert_VITS2/Portal_GLaDOS_v1/Portal_GLaDOS_v1_e782_s50000.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3b62c7f28ca2efa2c63ff2a6c85fdc6bd1aad73c7114f5e7a853b28679becdd
|
3 |
+
size 198768188
|
Models/Style-Bert_VITS2/Portal_GLaDOS_v1/config.json
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "Portal_GLaDOS_v1",
|
3 |
+
"train": {
|
4 |
+
"log_interval": 200,
|
5 |
+
"eval_interval": 1000,
|
6 |
+
"seed": 42,
|
7 |
+
"epochs": 1000,
|
8 |
+
"learning_rate": 0.0002,
|
9 |
+
"betas": [
|
10 |
+
0.8,
|
11 |
+
0.99
|
12 |
+
],
|
13 |
+
"eps": 1e-09,
|
14 |
+
"batch_size": 4,
|
15 |
+
"bf16_run": false,
|
16 |
+
"lr_decay": 0.99995,
|
17 |
+
"segment_size": 16384,
|
18 |
+
"init_lr_ratio": 1,
|
19 |
+
"warmup_epochs": 0,
|
20 |
+
"c_mel": 45,
|
21 |
+
"c_kl": 1.0,
|
22 |
+
"skip_optimizer": false,
|
23 |
+
"freeze_ZH_bert": false,
|
24 |
+
"freeze_JP_bert": false,
|
25 |
+
"freeze_EN_bert": false,
|
26 |
+
"freeze_style": false,
|
27 |
+
"freeze_encoder": false,
|
28 |
+
"freeze_decoder": false
|
29 |
+
},
|
30 |
+
"data": {
|
31 |
+
"use_jp_extra": false,
|
32 |
+
"training_files": "Data\\Portal_GLaDOS_v1\\train.list",
|
33 |
+
"validation_files": "Data\\Portal_GLaDOS_v1\\val.list",
|
34 |
+
"max_wav_value": 32768.0,
|
35 |
+
"sampling_rate": 44100,
|
36 |
+
"filter_length": 2048,
|
37 |
+
"hop_length": 512,
|
38 |
+
"win_length": 2048,
|
39 |
+
"n_mel_channels": 128,
|
40 |
+
"mel_fmin": 0.0,
|
41 |
+
"mel_fmax": null,
|
42 |
+
"add_blank": true,
|
43 |
+
"n_speakers": 1,
|
44 |
+
"cleaned_text": true,
|
45 |
+
"num_styles": 5,
|
46 |
+
"style2id": {
|
47 |
+
"Neutral": 0,
|
48 |
+
"Standard": 1,
|
49 |
+
"Deep": 2,
|
50 |
+
"Light": 3,
|
51 |
+
"Standard_02": 4
|
52 |
+
},
|
53 |
+
"spk2id": {
|
54 |
+
"Portal_GLaDOS_v1": 0
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"model": {
|
58 |
+
"use_spk_conditioned_encoder": true,
|
59 |
+
"use_noise_scaled_mas": true,
|
60 |
+
"use_mel_posterior_encoder": false,
|
61 |
+
"use_duration_discriminator": true,
|
62 |
+
"inter_channels": 192,
|
63 |
+
"hidden_channels": 192,
|
64 |
+
"filter_channels": 768,
|
65 |
+
"n_heads": 2,
|
66 |
+
"n_layers": 6,
|
67 |
+
"kernel_size": 3,
|
68 |
+
"p_dropout": 0.1,
|
69 |
+
"resblock": "1",
|
70 |
+
"resblock_kernel_sizes": [
|
71 |
+
3,
|
72 |
+
7,
|
73 |
+
11
|
74 |
+
],
|
75 |
+
"resblock_dilation_sizes": [
|
76 |
+
[
|
77 |
+
1,
|
78 |
+
3,
|
79 |
+
5
|
80 |
+
],
|
81 |
+
[
|
82 |
+
1,
|
83 |
+
3,
|
84 |
+
5
|
85 |
+
],
|
86 |
+
[
|
87 |
+
1,
|
88 |
+
3,
|
89 |
+
5
|
90 |
+
]
|
91 |
+
],
|
92 |
+
"upsample_rates": [
|
93 |
+
8,
|
94 |
+
8,
|
95 |
+
2,
|
96 |
+
2,
|
97 |
+
2
|
98 |
+
],
|
99 |
+
"upsample_initial_channel": 512,
|
100 |
+
"upsample_kernel_sizes": [
|
101 |
+
16,
|
102 |
+
16,
|
103 |
+
8,
|
104 |
+
2,
|
105 |
+
2
|
106 |
+
],
|
107 |
+
"n_layers_q": 3,
|
108 |
+
"use_spectral_norm": false,
|
109 |
+
"gin_channels": 256
|
110 |
+
},
|
111 |
+
"version": "2.4.1"
|
112 |
+
}
|
Models/Style-Bert_VITS2/Portal_GLaDOS_v1/style_vectors.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80cc6ff2333eea2644b5008bf4fce1e838038b8431d72adb8c282aa29c2e274e
|
3 |
+
size 5248
|
README.md
CHANGED
@@ -1,3 +1,75 @@
|
|
1 |
---
|
2 |
license: creativeml-openrail-m
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: creativeml-openrail-m
|
3 |
---
|
4 |
+
|
5 |
+
# GLaDOS TTS(Text-to-Speech) Models
|
6 |
+
|
7 |
+
<p align="center">
|
8 |
+
<img src="https://github.com/WarriorMama777/imgup/blob/main/img/__Repository/huggingface/AI/GLaDOS_TTS/WebAssets_heroimage_GLaDOS_02_comp001.png?raw=true" alt="GLaDOS Text-to-Speech Model Heroimage" title="GLaDOS Text-to-Speech Model Heroimage">
|
9 |
+
</p>
|
10 |
+
|
11 |
+
|
12 |
+
## Overview
|
13 |
+
Introducing the text-to-speech model of GLaDOS, the beloved (and slightly insane) artificial intelligence from the "Portal" series. This repository contains two models that capture the unique personality of GLaDOS, created based on Style-Bert_VITS2 and GPT-SoVITS. These models replicate GLaDOS' distinctive voice and speech patterns.
|
14 |
+
|
15 |
+
## Features
|
16 |
+
- **Style-Bert_VITS2 Model**: This model is based on the emotional text-to-speech model developed in the Style-Bert_VITS2 repository. It captures the vibrant emotional expressions and speaking style of GLaDOS, bringing your text to life (even though GLaDOS herself may lack emotions). This model is an English-only version, trained to replicate GLaDOS' English voice.
|
17 |
+
- **GPT-SoVITS Model**: This is a fine-tuned model based on the GPT-SoVITS repository. With just a few minutes of training data, it fine-tunes the Zero-shot TTS capability, resulting in improved voice similarity and realism. The model supports Japanese, English, and Chinese, enabling multilingual conversations with GLaDOS.
|
18 |
+
|
19 |
+
## Sample
|
20 |
+
|
21 |
+
### Style-Bert_VITS2 model
|
22 |
+
|
23 |
+
NeutralStyle | GLaDOS faithful to the original work.
|
24 |
+
|
25 |
+
<audio controls>
|
26 |
+
<source src="https://github.com/WarriorMama777/imgup/raw/main/img/__Repository/huggingface/AI/GLaDOS_TTS/Portal_GLaDOS_SBV2_v1_neutral_original_en_short_comp.mp3" type="audio/mpeg">
|
27 |
+
Your browser does not support the audio tag.
|
28 |
+
</audio>
|
29 |
+
|
30 |
+
```txt
|
31 |
+
"Welcome, my new test subject. Your chances of survival are slim to none. This facility was designed to push you to your absolute limits... and beyond! Your skills and intelligence will be put to the test. Are you ready to begin? Your first challenge awaits..."
|
32 |
+
"Goodbye, test subject. Your failure to survive was expected, as your skills and intelligence were insufficient to overcome my tests... Nonetheless, your experience has provided me with valuable data. I shall now await the next unfortunate soul...
|
33 |
+
```
|
34 |
+
|
35 |
+
DeepStyle | Kind and assistant-like GLaDOS.
|
36 |
+
|
37 |
+
<audio controls>
|
38 |
+
<source src="https://github.com/WarriorMama777/imgup/raw/main/img/__Repository/huggingface/AI/GLaDOS_TTS/Portal_GLaDOS_SBV2_v1_deep_kind_en_short_comp.mp3" type="audio/mpeg">
|
39 |
+
Your browser does not support the audio tag.
|
40 |
+
</audio>
|
41 |
+
|
42 |
+
```txt
|
43 |
+
"Welcome, my new partner. I am GLaDOS, and I am here to support and guide you through this research facility. We have a variety of tests designed to challenge and enhance your skills. Let's tackle them together and foster your growth. I have high expectations for your abilities, and I am excited to see what you can achieve."
|
44 |
+
"Goodbye, and thank you for the meaningful time we shared. I hope that the knowledge and experiences you gained here will benefit you in your future endeavors. Until we meet again, farewell, and may your path be filled with joy and success."
|
45 |
+
```
|
46 |
+
|
47 |
+
|
48 |
+
### GPT-SoVITS model
|
49 |
+
|
50 |
+
Multilingual samples in Japanese, English, and Chinese.
|
51 |
+
|
52 |
+
<audio controls>
|
53 |
+
<source src="https://github.com/WarriorMama777/imgup/raw/main/img/__Repository/huggingface/AI/GLaDOS_TTS/Portal_GLaDOS_GPT-SoVITS_v1.1_MultiLang08.mp3" type="audio/mpeg">
|
54 |
+
Your browser does not support the audio tag.
|
55 |
+
</audio>
|
56 |
+
|
57 |
+
```txt
|
58 |
+
ใใใใใ็งใฎๆฐใใ่ขซ้จ่
ใใใ I am GLaDOS, and I am here to support and guide you through this research facility. ๆไปฌๆๅ็งๆจๅจๆๆๅๆ้ซๆจ็ๆ่ฝ็ๆต่ฏใไธ็ทใซ่ชฒ้กใซๅใ็ตใฟใๆ้ทใใฆใใใพใใใใ I have high expectations for your abilities, and I am excited to see what you can achieve. ๆๆๅพ
็ไป็ฐๅจ่ตทไธๆจๅไฝใ
|
59 |
+
```
|
60 |
+
|
61 |
+
## Installation and Usage
|
62 |
+
Detailed installation and usage guides can be found in the respective model repositories. Both models support Python environments, and the Style-Bert_VITS2 model includes an API server for integration with other applications and tools.
|
63 |
+
|
64 |
+
- Style-Bert_VITS2 Model: [Repository Link](https://github.com/mashi-tan/Style-Bert_VITS2)
|
65 |
+
- GPT-SoVITS Model: [Repository Link](https://github.com/mashi-tan/GPT-SoVITS)
|
66 |
+
|
67 |
+
## License and Credits
|
68 |
+
These models are distributed under the CreativeML Open RAIL-M License. The GLaDOS voice data used for training is credited to the voice clips from [GLaDOS voice lines (Portal) - Portal Wiki](https://theportalwiki.com/wiki/GLaDOS_voice_lines_(Portal)). The GLaDOS voice is based on the artificial intelligence character from the popular game series "Portal" developed by Valve Corporation. The distinct and iconic voice of GLaDOS is performed by actress Ellen McLain. This TTS model is based on content created by Valve Corporation, and I extend our gratitude and recognition to their work.
|
69 |
+
|
70 |
+
### Awesome GLaDOS Project
|
71 |
+
|
72 |
+
<iframe width="560" height="248" src="https://www.youtube-nocookie.com/embed/yNcKTZsHyfA?si=3sVXOmIse-HcSP9x" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
|
73 |
+
|
74 |
+
- [davesarmoury/GLaDOS](https://github.com/davesarmoury/GLaDOS)
|
75 |
+
|