frontend itf

zh794390558 · zh794390558 · commit a9f4ce47a34b · 2022-04-01T10:24:16.000Z
diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc
@@ -20,7 +20,7 @@
 #include "frontend/audio_cache.h"
 #include "frontend/data_cache.h"
 #include "frontend/feature_cache.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "frontend/normalizer.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
@@ -170,13 +170,13 @@ int main(int argc, char* argv[]) {
     // feature pipeline: wave cache --> decibel_normalizer --> hanning
     // window -->linear_spectrogram --> global cmvn -> feat cache
 
-    // std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
+    // std::unique_ptr<ppspeech::FrontendInterface> data_source(new
     // ppspeech::DataCache());
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(
+    std::unique_ptr<ppspeech::FrontendInterface> data_source(
         new ppspeech::AudioCache());
 
     ppspeech::DecibelNormalizerOptions db_norm_opt;
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> db_norm(
+    std::unique_ptr<ppspeech::FrontendInterface> db_norm(
         new ppspeech::DecibelNormalizer(db_norm_opt, std::move(data_source)));
 
     ppspeech::LinearSpectrogramOptions opt;
@@ -185,10 +185,10 @@ int main(int argc, char* argv[]) {
     LOG(INFO) << "frame length (ms): " << opt.frame_opts.frame_length_ms;
     LOG(INFO) << "frame shift (ms): " << opt.frame_opts.frame_shift_ms;
 
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> linear_spectrogram(
+    std::unique_ptr<ppspeech::FrontendInterface> linear_spectrogram(
         new ppspeech::LinearSpectrogram(opt, std::move(db_norm)));
 
-    std::unique_ptr<ppspeech::FeatureExtractorInterface> cmvn(
+    std::unique_ptr<ppspeech::FrontendInterface> cmvn(
         new ppspeech::CMVN(FLAGS_cmvn_write_path,
                            std::move(linear_spectrogram)));
 
diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio_cache.h
@@ -16,12 +16,12 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 namespace ppspeech {
 
 // waves cache
-class AudioCache : public FeatureExtractorInterface {
+class AudioCache : public FrontendInterface {
   public:
     explicit AudioCache(int buffer_size = kint16max);
 
diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/data_cache.h
@@ -17,13 +17,13 @@
 
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 
 namespace ppspeech {
 // A data source for testing different frontend module.
 // It accepts waves or feats.
-class DataCache : public FeatureExtractorInterface {
+class DataCache : public FrontendInterface {
   public:
     explicit DataCache() { finished_ = false; }
 
diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/fbank.h
@@ -20,10 +20,10 @@
 
 namespace ppspeech {
 
-class FbankExtractor : FeatureExtractorInterface {
+class FbankExtractor : FrontendInterface {
   public:
     explicit FbankExtractor(const FbankOptions& opts,
-                            share_ptr<FeatureExtractorInterface> pre_extractor);
+                            share_ptr<FrontendInterface> pre_extractor);
     virtual void AcceptWaveform(
         const kaldi::Vector<kaldi::BaseFloat>& input) = 0;
     virtual void Read(kaldi::Vector<kaldi::BaseFloat>* feat) = 0;
diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/feature_cache.cc
@@ -24,7 +24,7 @@ using kaldi::SubVector;
 using std::unique_ptr;
 
 FeatureCache::FeatureCache(
-    int max_size, unique_ptr<FeatureExtractorInterface> base_extractor) {
+    int max_size, unique_ptr<FrontendInterface> base_extractor) {
     max_size_ = max_size;
     base_extractor_ = std::move(base_extractor);
 }
diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/feature_cache.h
@@ -15,15 +15,15 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 
 namespace ppspeech {
 
-class FeatureCache : public FeatureExtractorInterface {
+class FeatureCache : public FrontendInterface {
   public:
     explicit FeatureCache(
         int32 max_size = kint16max,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
+        std::unique_ptr<FrontendInterface> base_extractor = NULL);
 
     // Feed feats or waves
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
@@ -53,7 +53,7 @@ class FeatureCache : public FeatureExtractorInterface {
     bool Compute();
 
     size_t max_size_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
 
     std::mutex mutex_;
     std::queue<kaldi::Vector<BaseFloat>> cache_;
diff --git a/speechx/speechx/frontend/feature_extractor_controller.h b/speechx/speechx/frontend/feature_extractor_controller.h
diff --git a/speechx/speechx/frontend/feature_extractor_controller_impl.h b/speechx/speechx/frontend/feature_extractor_controller_impl.h
diff --git a/speechx/speechx/frontend/frontend_itf.h b/speechx/speechx/frontend/frontend_itf.h
@@ -19,7 +19,7 @@
 
 namespace ppspeech {
 
-class FeatureExtractorInterface {
+class FrontendInterface {
   public:
     // Feed inputs: features(2D saved in 1D) or waveforms(1D).
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/linear_spectrogram.cc
@@ -27,7 +27,7 @@ using std::vector;
 
 LinearSpectrogram::LinearSpectrogram(
     const LinearSpectrogramOptions& opts,
-    std::unique_ptr<FeatureExtractorInterface> base_extractor) {
+    std::unique_ptr<FrontendInterface> base_extractor) {
     opts_ = opts;
     base_extractor_ = std::move(base_extractor);
     int32 window_size = opts.frame_opts.WindowSize();
diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/linear_spectrogram.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 
 namespace ppspeech {
@@ -35,11 +35,11 @@ struct LinearSpectrogramOptions {
     }
 };
 
-class LinearSpectrogram : public FeatureExtractorInterface {
+class LinearSpectrogram : public FrontendInterface {
   public:
     explicit LinearSpectrogram(
         const LinearSpectrogramOptions& opts,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor);
+        std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
     // the dim_ is the dim of single frame feature
@@ -61,7 +61,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
     std::vector<kaldi::BaseFloat> hanning_window_;
     kaldi::BaseFloat hanning_window_energy_;
     LinearSpectrogramOptions opts_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     int chunk_sample_size_;
     DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram);
 };
diff --git a/speechx/speechx/frontend/normalizer.cc b/speechx/speechx/frontend/normalizer.cc
@@ -28,7 +28,7 @@ using std::unique_ptr;
 
 DecibelNormalizer::DecibelNormalizer(
     const DecibelNormalizerOptions& opts,
-    std::unique_ptr<FeatureExtractorInterface> base_extractor) {
+    std::unique_ptr<FrontendInterface> base_extractor) {
     base_extractor_ = std::move(base_extractor);
     opts_ = opts;
     dim_ = 1;
@@ -92,7 +92,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
 }
 
 CMVN::CMVN(std::string cmvn_file,
-           unique_ptr<FeatureExtractorInterface> base_extractor)
+           unique_ptr<FrontendInterface> base_extractor)
     : var_norm_(true) {
     base_extractor_ = std::move(base_extractor);
     bool binary;
diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
@@ -40,11 +40,11 @@ struct DecibelNormalizerOptions {
     }
 };
 
-class DecibelNormalizer : public FeatureExtractorInterface {
+class DecibelNormalizer : public FrontendInterface {
   public:
     explicit DecibelNormalizer(
         const DecibelNormalizerOptions& opts,
-        std::unique_ptr<FeatureExtractorInterface> base_extractor);
+        std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
     // noramlize audio, the dim is 1.
@@ -57,15 +57,15 @@ class DecibelNormalizer : public FeatureExtractorInterface {
     bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
     DecibelNormalizerOptions opts_;
     size_t dim_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     kaldi::Vector<kaldi::BaseFloat> waveform_;
 };
 
 
-class CMVN : public FeatureExtractorInterface {
+class CMVN : public FrontendInterface {
   public:
     explicit CMVN(std::string cmvn_file,
-                  std::unique_ptr<FeatureExtractorInterface> base_extractor);
+                  std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
 
     // the length of feats = feature_row * feature_dim,
@@ -81,7 +81,7 @@ class CMVN : public FeatureExtractorInterface {
     void Compute(kaldi::VectorBase<kaldi::BaseFloat>* feats) const;
     void ApplyCMVN(kaldi::MatrixBase<BaseFloat>* feats);
     kaldi::Matrix<double> stats_;
-    std::unique_ptr<FeatureExtractorInterface> base_extractor_;
+    std::unique_ptr<FrontendInterface> base_extractor_;
     size_t dim_;
     bool var_norm_;
 };
diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/nnet/decodable.cc
@@ -22,7 +22,7 @@ using std::vector;
 using kaldi::Vector;
 
 Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet,
-                     const std::shared_ptr<FeatureExtractorInterface>& frontend)
+                     const std::shared_ptr<FrontendInterface>& frontend)
     : frontend_(frontend), nnet_(nnet), frame_offset_(0), frames_ready_(0) {}
 
 void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #include "base/common.h"
-#include "frontend/feature_extractor_interface.h"
+#include "frontend/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "nnet/decodable-itf.h"
 #include "nnet/nnet_interface.h"
@@ -26,7 +26,7 @@ class Decodable : public kaldi::DecodableInterface {
   public:
     explicit Decodable(
         const std::shared_ptr<NnetInterface>& nnet,
-        const std::shared_ptr<FeatureExtractorInterface>& frontend);
+        const std::shared_ptr<FrontendInterface>& frontend);
     // void Init(DecodableOpts config);
     virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
     virtual bool IsLastFrame(int32 frame) const;
@@ -41,7 +41,7 @@ class Decodable : public kaldi::DecodableInterface {
 
   private:
     bool AdvanceChunk();
-    std::shared_ptr<FeatureExtractorInterface> frontend_;
+    std::shared_ptr<FrontendInterface> frontend_;
     std::shared_ptr<NnetInterface> nnet_;
     kaldi::Matrix<kaldi::BaseFloat> nnet_cache_;
     // std::vector<std::vector<kaldi::BaseFloat>> nnet_cache_;

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ using kaldi::SubVector;`
`24`	`24`	`using std::unique_ptr;`
`25`	`25`
`26`	`26`	`FeatureCache::FeatureCache(`
`27`		`- int max_size, unique_ptr<FeatureExtractorInterface> base_extractor) {`
	`27`	`+ int max_size, unique_ptr<FrontendInterface> base_extractor) {`
`28`	`28`	`max_size_ = max_size;`
`29`	`29`	`base_extractor_ = std::move(base_extractor);`
`30`	`30`	`}`