Skip to content

Commit 0ad44da

Browse files
authored
fixes and information updates (#509)
* derank extension_bark_legacy * attempt install all by default * Add fixes for decorators to work with non-'text' inputs. * Clean up .env generator and remove the Bark environment variables from settings. * Add Audio book extension definitions for future use (extensions not available yet). * update readme
1 parent bf29d91 commit 0ad44da

File tree

11 files changed

+139
-173
lines changed

11 files changed

+139
-173
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ extensions.external.json
4343
/data/models/kimi-audio/
4444
/data/models/piper/
4545
/data/models/ace_step/
46+
/data/models/bark/
47+
4648
/data/api/presets/
4749

4850
# Ignore temporary files

README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
[![Discord](https://img.shields.io/discord/1258772280071295018?label=discord&logo=discord&logoColor=white)](https://discord.gg/V8BKTVRtJ9)
2121
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rsxdalv/tts-webui/blob/main/notebooks/google_colab.ipynb)
2222
[![GitHub forks](https://img.shields.io/github/forks/rsxdalv/tts-webui?style=social)](https://github.com/rsxdalv/tts-webui/network/members)
23+
[![YouTube](https://img.shields.io/badge/YouTube-%23FF0000.svg?logo=YouTube&logoColor=white)](https://www.youtube.com/@TTS-WebUI)
2324

2425
</div>
2526

@@ -29,7 +30,7 @@
2930

3031
</div>
3132

32-
| [![Watch the video](https://img.youtube.com/vi/Y8J717tr9t0/sddefault.jpg)](https://youtu.be/Y8J717tr9t0) | [![Watch the video](https://img.youtube.com/vi/ScN2ypewABc/sddefault.jpg)](https://youtu.be/ScN2ypewABc) | [![Watch the video](https://img.youtube.com/vi/JXojhFjZ39k/sddefault.jpg)](https://youtu.be/JXojhFjZ39k) |
33+
| [![Watch the video](https://img.youtube.com/vi/JXojhFjZ39k/sddefault.jpg)](https://youtu.be/JXojhFjZ39k) | [![Watch the video](https://img.youtube.com/vi/ScN2ypewABc/sddefault.jpg)](https://youtu.be/ScN2ypewABc) | [![Watch the video](https://img.youtube.com/vi/HFtrCnczZQI/sddefault.jpg)](https://youtu.be/HFtrCnczZQI) |
3334
| :------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: |
3435

3536
<div align="center">
@@ -85,6 +86,14 @@
8586

8687
## Changelog
8788

89+
May 26:
90+
* Add fixes for decorators to work with non-'text' inputs.
91+
* Clean up .env generator and remove the Bark environment variables from settings.
92+
* Add Audio book extension definitions for future use (extensions not available yet).
93+
* Fix SeamlessM4T Audio to Audio tab.
94+
* Update ACE-Step extension.
95+
* Improve Kokoro TTS API.
96+
8897
May 14:
8998
* Prepare for Python 3.11 and 3.12 support.
9099

extensions.json

Lines changed: 76 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -112,22 +112,6 @@
112112
"extension_website": "https://github.com/rsxdalv/extension_bark",
113113
"extension_platform_version": "0.0.1"
114114
},
115-
{
116-
"package_name": "extension_bark_voice_clone",
117-
"recommended": true,
118-
"name": "Bark Voice Clone",
119-
"version": "0.0.1",
120-
"requirements": "git+https://github.com/rsxdalv/extension_bark_voice_clone@main",
121-
"description": "Bark Voice Clone allows cloning voices for use with Bark TTS",
122-
"extension_type": "interface",
123-
"extension_class": "text-to-speech",
124-
"author": "GitMylo",
125-
"extension_author": "rsxdalv",
126-
"license": "MIT",
127-
"website": "https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer",
128-
"extension_website": "https://github.com/rsxdalv/extension_bark_voice_clone",
129-
"extension_platform_version": "0.0.1"
130-
},
131115
{
132116
"package_name": "extension_stable_audio",
133117
"recommended": true,
@@ -268,21 +252,6 @@
268252
"extension_website": "https://github.com/rsxdalv/extension_riffusion",
269253
"extension_platform_version": "0.0.1"
270254
},
271-
{
272-
"package_name": "extension_bark_legacy",
273-
"name": "Bark Legacy",
274-
"version": "0.0.1",
275-
"requirements": "git+https://github.com/rsxdalv/extension_bark_legacy@main",
276-
"description": "This is the legacy UI of Bark from TTS-WebUI",
277-
"extension_type": "interface",
278-
"extension_class": "text-to-speech",
279-
"author": "rsxdalv",
280-
"extension_author": "rsxdalv",
281-
"license": "MIT",
282-
"website": "https://github.com/rsxdalv/extension_bark_legacy",
283-
"extension_website": "https://github.com/rsxdalv/extension_bark_legacy",
284-
"extension_platform_version": "0.0.1"
285-
},
286255
{
287256
"package_name": "extension_audiocraft_mac",
288257
"name": "MusicGen (Mac)",
@@ -672,6 +641,82 @@
672641
"website": "https://github.com/rsxdalv/extension_rvc_training",
673642
"extension_website": "https://github.com/rsxdalv/extension_rvc_training",
674643
"extension_platform_version": "0.0.1"
644+
},
645+
{
646+
"package_name": "extension_bark_legacy",
647+
"name": "Bark Legacy",
648+
"version": "0.0.1",
649+
"requirements": "git+https://github.com/rsxdalv/extension_bark_legacy@main",
650+
"description": "This is the legacy UI of Bark from TTS-WebUI",
651+
"extension_type": "interface",
652+
"extension_class": "text-to-speech",
653+
"author": "rsxdalv",
654+
"extension_author": "rsxdalv",
655+
"license": "MIT",
656+
"website": "https://github.com/rsxdalv/extension_bark_legacy",
657+
"extension_website": "https://github.com/rsxdalv/extension_bark_legacy",
658+
"extension_platform_version": "0.0.1"
659+
},
660+
{
661+
"package_name": "extension_bark_voice_clone",
662+
"recommended": true,
663+
"name": "Bark Voice Clone",
664+
"version": "0.0.1",
665+
"requirements": "git+https://github.com/rsxdalv/extension_bark_voice_clone@main",
666+
"description": "Bark Voice Clone allows cloning voices for use with Bark TTS",
667+
"extension_type": "interface",
668+
"extension_class": "tools",
669+
"author": "GitMylo",
670+
"extension_author": "rsxdalv",
671+
"license": "MIT",
672+
"website": "https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer",
673+
"extension_website": "https://github.com/rsxdalv/extension_bark_voice_clone",
674+
"extension_platform_version": "0.0.1"
675+
},
676+
{
677+
"package_name": "extension_ebook2audiobook",
678+
"name": "Ebook2Audiobook (Not available yet)",
679+
"version": "0.0.1",
680+
"requirements": "git+https://github.com/rsxdalv/extension_ebook2audiobook@main",
681+
"description": "Ebook2Audiobook allows converting ebooks to audiobooks",
682+
"extension_type": "interface",
683+
"extension_class": "tools",
684+
"author": "rsxdalv",
685+
"extension_author": "rsxdalv",
686+
"license": "MIT",
687+
"website": "https://github.com/rsxdalv/extension_ebook2audiobook",
688+
"extension_website": "https://github.com/rsxdalv/extension_ebook2audiobook",
689+
"extension_platform_version": "0.0.1"
690+
},
691+
{
692+
"package_name": "extension_epub2tts",
693+
"name": "EPub2TTS (Not available yet)",
694+
"version": "0.0.1",
695+
"requirements": "git+https://github.com/rsxdalv/extension_epub2tts@main",
696+
"description": "EPub2TTS allows converting ebooks to audiobooks",
697+
"extension_type": "interface",
698+
"extension_class": "tools",
699+
"author": "rsxdalv",
700+
"extension_author": "rsxdalv",
701+
"license": "MIT",
702+
"website": "https://github.com/rsxdalv/extension_epub2tts",
703+
"extension_website": "https://github.com/rsxdalv/extension_epub2tts",
704+
"extension_platform_version": "0.0.1"
705+
},
706+
{
707+
"package_name": "extension_audiobook_generator",
708+
"name": "Audiobook Generator (Not available yet)",
709+
"version": "0.0.1",
710+
"requirements": "git+https://github.com/rsxdalv/extension_audiobook_generator@main",
711+
"description": "Audiobook Generator allows converting ebooks to audiobooks",
712+
"extension_type": "interface",
713+
"extension_class": "tools",
714+
"author": "rsxdalv",
715+
"extension_author": "rsxdalv",
716+
"license": "MIT",
717+
"website": "https://github.com/rsxdalv/extension_audiobook_generator",
718+
"extension_website": "https://github.com/rsxdalv/extension_audiobook_generator",
719+
"extension_platform_version": "0.0.1"
675720
}
676721
],
677722
"decorators": [

extensions/builtin/extension_decorator_save_ffmpeg/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def callback_save_generation_musicgen(
113113
print("Saving generation to", filename)
114114

115115
input_data = audio_array.tobytes()
116+
metadata["text"] = metadata.get("text", "")
116117
metadata["text"] = _double_escape_quotes(metadata["text"])
117118
metadata["text"] = _double_escape_newlines(metadata["text"])
118119
metadata_str = json.dumps(metadata, ensure_ascii=False)
@@ -216,6 +217,7 @@ def _attach_generation_meta(full_generation, arg1, metadata):
216217

217218
_attach_generation_meta(full_generation, "semantic_prompt", metadata)
218219
_attach_generation_meta(full_generation, "coarse_prompt", metadata)
220+
metadata["text"] = metadata.get("text", "")
219221
metadata["text"] = _double_escape_quotes(metadata["text"])
220222
metadata["text"] = _double_escape_newlines(metadata["text"])
221223

installer_scripts/js/initializeApp.js

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,12 @@ async function pip_install_all(fi = false) {
157157
"Dependencies are already up to date, skipping pip installs..."
158158
);
159159

160-
const pip_install_all_choice = await menu(
161-
["Yes", "No"],
162-
`Attempt single pip install of all dependencies (potentially faster)?
163-
(use arrow keys to move, enter to select)`
164-
);
160+
// const pip_install_all_choice = await menu(
161+
// ["Yes", "No"],
162+
// `Attempt single pip install of all dependencies (potentially faster)?
163+
// (use arrow keys to move, enter to select)`
164+
// );
165+
const pip_install_all_choice = "Yes";
165166

166167
if (pip_install_all_choice === "Yes") {
167168
try {

react-ui/src/pages/text-to-speech/bark/bark_settings.tsx

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -83,29 +83,6 @@ const BarkSettingsPage = () => {
8383
console.error("Error:", error);
8484
}
8585
}
86-
87-
const env_keys = [
88-
"env_suno_use_small_models",
89-
"env_suno_enable_mps",
90-
"env_suno_offload_cpu",
91-
];
92-
const isEnv = env_keys.includes(name);
93-
94-
if (isEnv) {
95-
try {
96-
await fetch("/api/gradio/save_env_variables_bark", {
97-
method: "POST",
98-
body: JSON.stringify(
99-
Object.fromEntries(
100-
env_keys.map((key) => [key, newBarkSettingsParams[key]])
101-
)
102-
),
103-
});
104-
setBarkSettingsParams(newBarkSettingsParams);
105-
} catch (error) {
106-
console.error("Error:", error);
107-
}
108-
}
10986
};
11087

11188
return (
@@ -177,39 +154,10 @@ const BarkSettingsPage = () => {
177154
/>
178155
</div>
179156

180-
<div className="flex flex-col gap-y-2 cell">
181-
<div className="flex gap-x-2 items-center">
182-
<label className="text-md">Environment (requires restart):</label>
183-
</div>
184-
<SwitchWithLabel
185-
label="Use small models"
186-
name="env_suno_use_small_models"
187-
value={barkSettingsParams.env_suno_use_small_models}
188-
onChange={handleChange}
189-
/>
190-
<SwitchWithLabel
191-
label="Enable MPS"
192-
name="env_suno_enable_mps"
193-
value={barkSettingsParams.env_suno_enable_mps}
194-
onChange={handleChange}
195-
/>
196-
<SwitchWithLabel
197-
label="Offload GPU models to CPU"
198-
name="env_suno_offload_cpu"
199-
value={barkSettingsParams.env_suno_offload_cpu}
200-
onChange={handleChange}
201-
/>
202-
</div>
203-
204157
<Button
205158
variant="outline"
206159
onClick={() => {
207-
setBarkSettingsParams({
208-
...barkSettingsParams,
209-
env_suno_use_small_models: false,
210-
env_suno_enable_mps: false,
211-
env_suno_offload_cpu: false,
212-
});
160+
setBarkSettingsParams({ ...barkSettingsParams });
213161
}}
214162
>
215163
Reset to defaults

react-ui/src/tabs/BarkSettingsParams.tsx

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@ import router from "next/router";
44
export const barkSettingsId = "barkSettingsParams.v4";
55

66
export type BarkSettingsParams = {
7-
env_suno_use_small_models: boolean;
8-
env_suno_enable_mps: boolean;
9-
env_suno_offload_cpu: boolean;
10-
117
text_use_gpu: boolean;
128
text_use_small: boolean;
139
coarse_use_gpu: boolean;
@@ -19,10 +15,6 @@ export type BarkSettingsParams = {
1915
};
2016

2117
export const initialBarkSettingsParams: BarkSettingsParams = {
22-
env_suno_use_small_models: false,
23-
env_suno_enable_mps: false,
24-
env_suno_offload_cpu: false,
25-
2618
text_use_gpu: false,
2719
text_use_small: false,
2820
coarse_use_gpu: false,

tts_webui/decorators/__init__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
from .decorator_add_base_filename import decorator_add_base_filename
2-
from .decorator_add_date import decorator_add_date
3-
from .decorator_add_model_type import decorator_add_model_type
4-
from .decorator_apply_torch_seed import decorator_apply_torch_seed
5-
from .decorator_log_generation import decorator_log_generation
1+
from .decorator_add_base_filename import decorator_add_base_filename, decorator_add_base_filename_generator, format_date_for_file
2+
from .decorator_add_date import decorator_add_date, decorator_add_date_generator
3+
from .decorator_add_model_type import decorator_add_model_type, decorator_add_model_type_generator
4+
from .decorator_apply_torch_seed import decorator_apply_torch_seed, decorator_apply_torch_seed_generator
5+
from .decorator_log_generation import decorator_log_generation, decorator_log_generation_generator
66
from .decorator_save_metadata import decorator_save_metadata
77
from .decorator_save_musicgen_npz import decorator_save_musicgen_npz
8-
from .decorator_save_wav import decorator_save_wav
8+
from .decorator_save_wav import decorator_save_wav, decorator_save_wav_generator
99
from .gradio_dict_decorator import dictionarize
10-
from .log_function_time import log_function_time
10+
from .log_function_time import log_function_time, log_generator_time

tts_webui/decorators/decorator_add_base_filename.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def format_date_for_file(date: datetime):
1414

1515

1616
def _create_base_filename(kwargs, result_dict):
17-
prompt = kwargs["text"]
17+
prompt = kwargs.get("text", "")
1818
is_long = result_dict.get("long", False)
1919
base_filename = format_filename(
2020
title=prompt_to_title(prompt) + ("_long" if is_long else ""),

tts_webui/utils/log_generation.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,18 @@ def custom_repr(value):
1111

1212

1313
def StringifyParams(x):
14-
params = ",\n ".join(f"{k}={custom_repr(v)}" for k, v in x.items())
14+
def filter_keys(k):
15+
if k == "outputs":
16+
return False
17+
if k == "_type":
18+
return False
19+
if k == "text":
20+
return False
21+
return True
22+
23+
params = ",\n ".join(
24+
f"{k}={custom_repr(v)}" for k, v in x.items() if filter_keys(k)
25+
)
1526
return f"{_get_typed_dict_name(x)}(\n {params}\n)"
1627

1728

0 commit comments

Comments
 (0)