Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 3 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,26 +1,7 @@
## About

This is a demonstration for Kaldi on Android
This demo implements offline speech recognition and speaker identification for mobile applications using Kaldi and Vosk libraries.

## Usage
## Documentation

Simply import the project into Android Studio and run. It will listen for the audio and dump the transcription.

To use this library in your application simply modify the demo according to your needs - add kaldi-android aar
to dependencies, update the model and modify java UI code accodring to your needs.

## Development

This is just a demo project, the main setup to compile kaldi-android
library AAR is available at [vosk-api](http://github.com/alphacep/vosk-api). Check
compilation instructions there as well as development plans.

## Languages

Models for different languages (English, Chinese, Russian) are available in
[Releases](https://github.com/alphacep/kaldi-android-demo/releases) section. To use the model unpack it into
```kaldi-android-demo/models/src/main/assets/sync/model-android```. More languages gonna be ready soon.

## Updating grammar and language model

To run on android model has to be sufficiently small, we recommend to check model sizes in the demo to figure out what should be the size of the model. If you want to update the grammar or the acoustic model, check [vosk-api documentation](https://github.com/alphacep/vosk-api/blob/master/doc/models.md).
For documentation and instructions see [Vosk Website](https://alphacephei.com/vosk/android)
2 changes: 0 additions & 2 deletions aars/build.gradle

This file was deleted.

Binary file removed aars/kaldi-android-5.2.aar
Binary file not shown.
13 changes: 10 additions & 3 deletions app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,28 @@ apply plugin: 'com.android.application'

repositories {
google()
maven {
url "https://dl.bintray.com/alphacep/vosk"
}
}

android {
compileSdkVersion 29
defaultConfig {
applicationId "org.kaldi.demo"
applicationId "org.vosk.demo"
minSdkVersion 21
targetSdkVersion 29
versionCode 1
versionName "1.1"
ndk {
abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86_64', 'x86'
}
}
}

dependencies {
implementation project(':aars')
implementation project(':models')
implementation 'com.alphacep:vosk-android:0.3.10'
implementation 'androidx.appcompat:appcompat:1.1.0'

implementation project(':models')
}
58 changes: 35 additions & 23 deletions app/src/main/java/org/kaldi/demo/KaldiActivity.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import androidx.annotation.NonNull;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

import android.text.method.ScrollingMovementMethod;
import android.util.Log;
import android.view.View;
import android.widget.Button;
Expand All @@ -31,7 +33,8 @@
import org.kaldi.KaldiRecognizer;
import org.kaldi.Model;
import org.kaldi.RecognitionListener;
import org.kaldi.SpeechRecognizer;
import org.kaldi.SpeechService;
import org.kaldi.Vosk;

import java.io.File;
import java.io.IOException;
Expand All @@ -41,20 +44,18 @@
public class KaldiActivity extends Activity implements
RecognitionListener {

static {
System.loadLibrary("kaldi_jni");
}

static private final int STATE_START = 0;
static private final int STATE_READY = 1;
static private final int STATE_FILE = 2;
static private final int STATE_MIC = 3;
static private final int STATE_DONE = 2;
static private final int STATE_FILE = 3;
static private final int STATE_MIC = 4;

/* Used to handle permission request */
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;


private Model model;
private SpeechRecognizer recognizer;
private SpeechService speechService;
TextView resultView;

@Override
Expand Down Expand Up @@ -103,7 +104,10 @@ protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(activityReference.get());
File assetDir = assets.syncAssets();
Log.d("!!!!", assetDir.toString());
Log.d("KaldiDemo", "Sync files in the folder " + assetDir.toString());

Vosk.SetLogLevel(0);

activityReference.get().model = new Model(assetDir.toString() + "/model-android");
} catch (IOException e) {
return e;
Expand Down Expand Up @@ -136,7 +140,7 @@ protected String doInBackground(Void... params) {
long startTime = System.currentTimeMillis();
StringBuilder result = new StringBuilder();
try {
rec = new KaldiRecognizer(activityReference.get().model, 16000.f);
rec = new KaldiRecognizer(activityReference.get().model, 16000.f, "oh zero one two three four five six seven eight nine");

InputStream ais = activityReference.get().getAssets().open("10001-90210-01803.wav");
if (ais.skip(44) != 44) {
Expand Down Expand Up @@ -185,9 +189,9 @@ public void onRequestPermissionsResult(int requestCode,
public void onDestroy() {
super.onDestroy();

if (recognizer != null) {
recognizer.cancel();
recognizer.shutdown();
if (speechService != null) {
speechService.cancel();
speechService.shutdown();
}
}

Expand All @@ -209,15 +213,16 @@ public void onError(Exception e) {

@Override
public void onTimeout() {
recognizer.cancel();
recognizer = null;
speechService.cancel();
speechService = null;
setUiState(STATE_READY);
}

private void setUiState(int state) {
switch (state) {
case STATE_START:
resultView.setText(R.string.preparing);
resultView.setMovementMethod(new ScrollingMovementMethod());
findViewById(R.id.recognize_file).setEnabled(false);
findViewById(R.id.recognize_mic).setEnabled(false);
break;
Expand All @@ -227,13 +232,19 @@ private void setUiState(int state) {
findViewById(R.id.recognize_file).setEnabled(true);
findViewById(R.id.recognize_mic).setEnabled(true);
break;
case STATE_DONE:
((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
findViewById(R.id.recognize_file).setEnabled(true);
findViewById(R.id.recognize_mic).setEnabled(true);
break;
case STATE_FILE:
resultView.append(getString(R.string.starting));
resultView.setText(getString(R.string.starting));
findViewById(R.id.recognize_mic).setEnabled(false);
findViewById(R.id.recognize_file).setEnabled(false);
break;
case STATE_MIC:
((Button) findViewById(R.id.recognize_mic)).setText(R.string.stop_microphone);
resultView.setText(getString(R.string.say_something));
findViewById(R.id.recognize_file).setEnabled(false);
findViewById(R.id.recognize_mic).setEnabled(true);
break;
Expand All @@ -253,16 +264,17 @@ public void recognizeFile() {
}

public void recognizeMicrophone() {
if (recognizer != null) {
setUiState(STATE_READY);
recognizer.cancel();
recognizer = null;
if (speechService != null) {
setUiState(STATE_DONE);
speechService.cancel();
speechService = null;
} else {
setUiState(STATE_MIC);
try {
recognizer = new SpeechRecognizer(model);
recognizer.addListener(this);
recognizer.startListening();
KaldiRecognizer rec = new KaldiRecognizer(model, 16000.0f);
speechService = new SpeechService(rec, 16000.0f);
speechService.addListener(this);
speechService.startListening();
} catch (IOException e) {
setErrorState(e.getMessage());
}
Expand Down
3 changes: 2 additions & 1 deletion app/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>

<string name="app_name">Kaldi Demo</string>
<string name="app_name">Vosk Demo</string>
<string name="recognize_file">Recognize File</string>
<string name="recognize_microphone">Recognize Microphone</string>
<string name="stop_microphone">Stop Microphone</string>
Expand All @@ -10,5 +10,6 @@
<string name="failed">Failed to init recognizer %s\n</string>
<string name="elapsed">Result %s elapsed %d milliseconds\n</string>
<string name="starting">Starting\n</string>
<string name="say_something">Say something\n</string>

</resources>
2 changes: 2 additions & 0 deletions models/src/main/assets/sync/model-android/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
US English model for android

10 changes: 10 additions & 0 deletions models/src/main/assets/sync/model-android/conf/model.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
--min-active=200
--max-active=3000
--beam=10.0
--lattice-beam=2.0
--acoustic-scale=1.0
--frame-subsampling-factor=3
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
--endpoint.rule2.min-trailing-silence=0.5
--endpoint.rule3.min-trailing-silence=1.0
--endpoint.rule4.min-trailing-silence=2.0
2 changes: 1 addition & 1 deletion settings.gradle
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
include ':models', ':aars', ':app'
include ':models', ':app'