Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions src/kokoro_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,18 @@ static struct ggml_tensor * build_albert_norm(ggml_context * ctx, ggml_tensor *
return cur;
}

static struct ggml_tensor * build_lstm(ggml_context * ctx, ggml_tensor * input, lstm* rnn, uint32_t sequence_length) {
static struct ggml_tensor * build_lstm_run(ggml_context * ctx, ggml_cgraph * gf, ggml_tensor * input, ggml_tensor * h_0, ggml_tensor * c_0, std::vector<ggml_tensor*> weights, std::vector<ggml_tensor*> biases, uint32_t sequence_length, bool reversed = false);

static struct ggml_tensor * build_lstm(ggml_context * ctx, ggml_tensor * input, lstm* rnn, uint32_t sequence_length, ggml_cgraph * gf) {
struct ggml_tensor * resp = input;
struct ggml_tensor * reverse_resp = input;

// iterate over cells first so that at each pass to the next cell we have a fully formed vector (this improves performance as well as allocation for stacked lstms)
for (int c = 0; c < rnn->cells.size(); c++) {
resp = build_lstm_run(ctx, resp, rnn->hidden[c], rnn->states[c], rnn->cells[c]->weights, rnn->cells[c]->biases, sequence_length);
ggml_build_forward_expand(gf, resp);
resp = build_lstm_run(ctx, gf, resp, rnn->hidden[c], rnn->states[c], rnn->cells[c]->weights, rnn->cells[c]->biases, sequence_length);
if (rnn->bidirectional) {
reverse_resp = build_lstm_run(ctx, reverse_resp, rnn->hidden[c], rnn->states[c], rnn->cells[c]->reverse_weights, rnn->cells[c]->reverse_biases, sequence_length, true);
reverse_resp = build_lstm_run(ctx, gf, reverse_resp, rnn->hidden[c], rnn->states[c], rnn->cells[c]->reverse_weights, rnn->cells[c]->reverse_biases, sequence_length, true);
}
}
if (rnn->bidirectional) {
Expand All @@ -47,7 +50,7 @@ static struct ggml_tensor * build_lstm(ggml_context * ctx, ggml_tensor * input,
return resp;
}

static struct ggml_tensor * build_lstm_run(ggml_context * ctx, ggml_tensor * input, ggml_tensor * h_0, ggml_tensor * c_0, std::vector<ggml_tensor*> weights, std::vector<ggml_tensor*> biases, uint32_t sequence_length, bool reversed) {
static struct ggml_tensor * build_lstm_run(ggml_context * ctx, ggml_cgraph * gf, ggml_tensor * input, ggml_tensor * h_0, ggml_tensor * c_0, std::vector<ggml_tensor*> weights, std::vector<ggml_tensor*> biases, uint32_t sequence_length, bool reversed) {
struct ggml_tensor * I = ggml_add(ctx, ggml_mul_mat(ctx, weights[0], input), biases[0]);
struct ggml_tensor * F = ggml_add(ctx, ggml_mul_mat(ctx, weights[2], input), biases[2]);
struct ggml_tensor * G = ggml_add(ctx, ggml_mul_mat(ctx, weights[4], input), biases[4]);
Expand Down Expand Up @@ -77,6 +80,7 @@ static struct ggml_tensor * build_lstm_run(ggml_context * ctx, ggml_tensor * inp
} else {
outputs = reversed ? ggml_concat(ctx, h_0, outputs, 1) : ggml_concat(ctx, outputs, h_0, 1);
}
ggml_build_forward_expand(gf, outputs);
}
return outputs;
}
Expand Down Expand Up @@ -222,6 +226,7 @@ static struct ggml_tensor * build_generator(ggml_context * ctx, kokoro_model * m
}
}
cur = ggml_cont(ctx, ggml_transpose(ctx, ggml_div(ctx, cur, model->n_kernels_tensor)));
ggml_build_forward_expand(gf, cur);
}

cur = ggml_leaky_relu(ctx, cur, 0.01f, false);
Expand All @@ -235,7 +240,6 @@ static struct ggml_tensor * build_generator(ggml_context * ctx, kokoro_model * m
cur = ggml_concat(ctx, spec, phase, 3); // istft expects the magnitude and phase concatenated after the batch;
cur = istft(ctx, ggml_cont(ctx, ggml_transpose(ctx, cur)), window_sq_sum, generator->window, model->true_n_fft, model->stft_hop, true, true);
ggml_set_name(cur, "after_res_gen");
ggml_build_forward_expand(gf, cur);
return cur;
}

Expand Down Expand Up @@ -1004,6 +1008,7 @@ struct ggml_cgraph * kokoro_duration_runner::build_kokoro_duration_graph(kokoro_
cur = ggml_add(ctx, cur, residualffn);
cur = build_albert_norm(ctx, cur, model->layers[l]->layer_output_norm_weight, model->layers[l]->layer_output_norm_bias);
}
ggml_build_forward_expand(gf, cur);
}

// duration / prosody prediction
Expand All @@ -1014,7 +1019,7 @@ struct ggml_cgraph * kokoro_duration_runner::build_kokoro_duration_graph(kokoro_
cur = ggml_concat(ctx, cur, ggml_repeat(ctx, style_half, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, style_half->ne[0], cur->ne[1])), 0);

for (auto l : model->prosody_pred->layers) {
cur = build_lstm(ctx, cur, l->rnn, batch.n_tokens);
cur = build_lstm(ctx, cur, l->rnn, batch.n_tokens, gf);

struct ggml_tensor * gamma = ggml_add(ctx, ggml_mul_mat(ctx, l->ada_norm_gamma_weight, style_half), l->ada_norm_gamma_bias);
struct ggml_tensor * beta = ggml_add(ctx, ggml_mul_mat(ctx, l->ada_norm_beta_weight, style_half), l->ada_norm_beta_bias);
Expand All @@ -1025,15 +1030,14 @@ struct ggml_cgraph * kokoro_duration_runner::build_kokoro_duration_graph(kokoro_
// An optimal remedy to this would be to increment the gamma bias above by one when preparing the gguf file for the model.
cur = ggml_add(ctx, ggml_add(ctx, cur, ggml_mul(ctx, cur, gamma)), beta);
cur = ggml_concat(ctx, cur, ggml_repeat(ctx, style_half, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, style_half->ne[0], cur->ne[1])), 0);
ggml_build_forward_expand(gf, cur);
}

struct ggml_tensor * d = ggml_cont(ctx, cur);
ggml_set_name(d, "duration_hidden_states");
ggml_build_forward_expand(gf, d);

struct ggml_tensor * len;
cur = build_lstm(ctx, cur, model->prosody_pred->duration_proj_lstm, batch.n_tokens);
cur = build_lstm(ctx, cur, model->prosody_pred->duration_proj_lstm, batch.n_tokens, gf);
cur = ggml_sigmoid(ctx, ggml_add(ctx, ggml_mul_mat(ctx, model->prosody_pred->duration_proj, cur), model->prosody_pred->duration_proj_bias));
// If we were to support speed we would add a constant tensor for the speed and divide here.
len = ggml_round(ctx, ggml_sum_rows(ctx, cur));
Expand Down Expand Up @@ -1163,7 +1167,7 @@ struct ggml_cgraph * kokoro_runner::build_kokoro_graph(kokoro_ubatch & batch) {
cur = ggml_mul_mat(ctx, ggml_cont(ctx, ggml_transpose(ctx, kctx->duration_mask)), ggml_cont(ctx, ggml_transpose(ctx, kctx->duration_pred)));
cur = ggml_cont(ctx, ggml_transpose(ctx, cur));

cur = build_lstm(ctx, cur, model->prosody_pred->shared_lstm, cur->ne[1]);
cur = build_lstm(ctx, cur, model->prosody_pred->shared_lstm, cur->ne[1], gf);


struct ggml_tensor * f0_curve = cur;
Expand Down Expand Up @@ -1202,7 +1206,7 @@ struct ggml_cgraph * kokoro_runner::build_kokoro_graph(kokoro_ubatch & batch) {
cur = ggml_leaky_relu(ctx, cur, 0.2f, false);
}

cur = build_lstm(ctx, cur, model->text_encoder->out_lstm, kctx->sequence_length);
cur = build_lstm(ctx, cur, model->text_encoder->out_lstm, kctx->sequence_length, gf);
asr = ggml_mul_mat(ctx, ggml_cont(ctx, ggml_transpose(ctx, cur)), ggml_cont(ctx, ggml_transpose(ctx, kctx->duration_mask)));
}

Expand All @@ -1217,6 +1221,7 @@ struct ggml_cgraph * kokoro_runner::build_kokoro_graph(kokoro_ubatch & batch) {
n_base = ggml_add(ctx, ggml_conv_1d(ctx, model->decoder->n_conv, n, 2, 1, 1), model->decoder->n_conv_bias);
cur = ggml_concat(ctx, ggml_concat(ctx, ggml_cont(ctx, ggml_transpose(ctx, asr)), f0, 1), n_base, 1);
cur = build_ada_residual_conv(ctx, cur, model->decoder->encoder_block, style_half2, model->sqrt_tensor);
ggml_build_forward_expand(gf, cur);

asr_res = ggml_mul_mat(ctx, model->decoder->asr_conv, asr);
asr_res = ggml_add(ctx, asr_res, ggml_transpose(ctx, model->decoder->asr_conv_bias));
Expand All @@ -1225,6 +1230,7 @@ struct ggml_cgraph * kokoro_runner::build_kokoro_graph(kokoro_ubatch & batch) {
for (auto l : model->decoder->decoder_blocks) {
cur = ggml_concat(ctx, ggml_concat(ctx, ggml_concat(ctx, cur, asr_res, 1), f0, 1), n_base, 1 );
cur = build_ada_residual_conv(ctx, cur, l, style_half2, model->sqrt_tensor);
ggml_build_forward_expand(gf, cur);
}
cur = ggml_cont(ctx, ggml_transpose(ctx, cur));
}
Expand Down
2 changes: 0 additions & 2 deletions src/kokoro_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,6 @@ struct kokoro_duration_context : runner_context {
static struct ggml_tensor * build_albert_attn_mask(ggml_context * ctx, struct kokoro_duration_context *kctx, const kokoro_ubatch & batch);
static struct ggml_tensor * build_albert_inputs(ggml_context * ctx, kokoro_model * model, ggml_tensor * input_tokens, ggml_tensor * positions, ggml_tensor * token_types);
static struct ggml_tensor * build_albert_norm(ggml_context * ctx, ggml_tensor * cur, ggml_tensor * weight, ggml_tensor * bias);
static struct ggml_tensor * build_lstm(ggml_context * ctx, ggml_tensor * input, lstm* rnn, uint32_t sequence_length);
static struct ggml_tensor * build_lstm_run(ggml_context * ctx, ggml_tensor * input, ggml_tensor * h_0, ggml_tensor * c_0, std::vector<ggml_tensor*> weights, std::vector<ggml_tensor*> biases, uint32_t sequence_length, bool reversed = false);
static struct ggml_tensor * build_ada_residual_conv(ggml_context * ctx, struct ggml_tensor * x, ada_residual_conv_block * block, struct ggml_tensor * style, struct ggml_tensor * sqrt_tensor);
static struct ggml_tensor * build_kokoro_generator_res_block(ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * style, kokoro_generator_residual_block * block);
static struct ggml_tensor * build_noise_block(ggml_context * ctx, kokoro_noise_residual_block * block, struct ggml_tensor * x, struct ggml_tensor * style);
Expand Down