jurihock · jurihock · Dec 10, 2023 · Dec 7, 2023 · Dec 7, 2023 · Dec 8, 2023
diff --git a/MANUAL.md b/MANUAL.md
@@ -9,20 +9,21 @@ Following customizations are currently available:
 <img src="assets/screenshot.png" width="500">
 
 | # | Parameter | Function |
-| - | ------- | -------- |
-| 1 | Bypass | Enable to skip the signal processing and pass the input signal directly to the output. |
-| 2 | Normalize | Enable to match the processed output loudness level to the input level. |
-| 3 | Timbre quefrency | Cutoff _quefrency_ of the [cepstral](https://en.wikipedia.org/wiki/Cepstrum) low-pass _lifter_ in milliseconds used for timbre extraction. If you set it to zero, the pitch independent timbre processing disappears. |
-| 4 | Timbre shift | Number of semitones to shift the timbre independent of the pitch when `3` is enabled. |
-| 5-9 | Pitch shift | Number of semitones to repeatedly shift the pitch. Depending on `3` it will have an equal effect on the timbre. |
-| 10 | Pitch stages | Total amount of pitch shifts that will be applied. |
-| 11 | STFT overlap | Degree of the internal audio segment [overlap](https://api.semanticscholar.org/CorpusID:15841658). |
-| 12 | DFT size | Number of frequency bins to be used for spectral processing. |
+|:-:| ------- | -------- |
+| `1` | Bypass | Enable to skip the signal processing and pass the input signal directly to the output. |
+| `2` | Normalize | Enable to match the processed output loudness level to the input level. |
+| `3` | Timbre quefrency | Cutoff _quefrency_ of the [cepstral](https://en.wikipedia.org/wiki/Cepstrum) low-pass _lifter_ in milliseconds used for timbre extraction. If you set it to zero, the pitch independent timbre processing disappears. |
+| `4` | Timbre shift | Number of semitones to shift the timbre independent of the pitch when `3` is enabled. |
+| <nobr>`5`-`9`</nobr> | Pitch shift | Number of semitones to repeatedly shift the pitch. Depending on `3` it will have an equal effect on the timbre. |
+| `10` | Pitch stages | Total amount of pitch shifts that will be applied. |
+| `11` | STFT overlap | Degree of the internal audio segment [overlap](https://api.semanticscholar.org/CorpusID:15841658). |
+| `12` | DFT size | Number of frequency bins to be used for spectral processing. |
+| `13` | Low latency | Reduce the output latency to the lowest possible value based on the preset block size. |
 
 ## Automation
 
 - All parameters are adjustable at runtime, the changes take effect immediately.
-- Manually adjusting parameters `11` and `12` causes a complete reset of the internal signal processing. Permanently changing these two parameters at runtime is not recommended.
+- Manually adjusting parameters `11`, `12`, and `13` causes a complete reset of the internal signal processing. Permanently changing these two parameters at runtime is not recommended.
 - Additionally, parameters `11` and `12` are automatically adapted for extremely small and large block sizes. This usually happens only once at startup or on each manual change of `11` and `12`.
 
 ## Routing
@@ -33,7 +34,9 @@ However, only the first input channel (usually _left_) will be captured and pass
 ## Latency
 
 The output latency _generally_ depends on the preset block size and the DFT size `12`.
-Smaller blocks may have impact on the overal quality of the signal processing.
+In low latency mode `13`, smaller blocks can also affect the overall quality of the signal processing.
+
+Furthermore, the low latency mode `13` requires a constant block size. Some plugin hosts may vary the block size erratically during the processing time. In this case, this feature is not applicable.
 
 ## Performance
 

diff --git a/Makefile b/Makefile
@@ -39,6 +39,7 @@ unplug:
 	@rm -rf ~/Library/Audio/Plug-Ins/Components/$(PLUGIN).component
 
 reset: unplug
+	@rm -rf ~/Library/Application\ Support/Audacity/plugin*.cfg
 	@rm -rf ~/Library/Caches/AudioUnitCache
 	@sudo killall -9 AudioComponentRegistrar
 	@auval -a
diff --git a/assets/screenshot.png b/assets/screenshot.png
diff --git a/src/StftPitchShiftPlugin/Core.cpp b/src/StftPitchShiftPlugin/Core.cpp
@@ -1,21 +1,15 @@
 #include <StftPitchShiftPlugin/Core.h>
 
-Core::Core(const double samplerate, const int blocksize, const int dftsize, const int overlap)
+Core::Core(const double samplerate, const int blocksize, const int dftsize, const int overlap) :
+  samplerate(samplerate), blocksize(blocksize), dftsize(dftsize), overlap(overlap),
+  analysis_window_size(static_cast<size_t>(dftsize + dftsize)),
+  synthesis_window_size(static_cast<size_t>(blocksize))
 {
-  const auto analysis_window_size  = static_cast<size_t>(dftsize + dftsize);
-  const auto synthesis_window_size = static_cast<size_t>(blocksize);
-
   const auto winsize = std::make_tuple(analysis_window_size, synthesis_window_size);
   const auto hopsize = synthesis_window_size / static_cast<size_t>(overlap);
 
   const auto fft = std::make_shared<FFT>();
 
-  config.analysis_window_size = analysis_window_size;
-  config.synthesis_window_size = synthesis_window_size;
-
-  buffer.input.resize(analysis_window_size + synthesis_window_size);
-  buffer.output.resize(analysis_window_size + synthesis_window_size);
-
   stft = std::make_unique<stftpitchshift::STFT<double>>(fft, winsize, hopsize);
   core = std::make_unique<stftpitchshift::StftPitchShiftCore<double>>(fft, winsize, hopsize, samplerate);
 }
@@ -44,51 +38,10 @@ void Core::pitch(std::vector<double> values)
   core->factors(values);
 }
 
-bool Core::compatible(const int blocksize) const
-{
-  return static_cast<size_t>(blocksize) == config.synthesis_window_size;
-}
-
-void Core::process(const std::span<const float> input, const std::span<float> output)
+void Core::stft_pitch_shift(const std::span<const double> input, const std::span<double> output) const
 {
-  const auto analysis_window_size  = config.analysis_window_size;
-  const auto synthesis_window_size = config.synthesis_window_size;
-
-  // shift input buffer
-  std::copy(
-    buffer.input.begin() + synthesis_window_size,
-    buffer.input.end(),
-    buffer.input.begin());
-
-  // copy new input samples
-  std::transform(
-    input.begin(),
-    input.end(),
-    buffer.input.begin() + analysis_window_size,
-    transform<float, double>);
-
-  // apply pitch shifting within the built-in STFT routine
-  (*stft)(buffer.input, buffer.output, [&](std::span<std::complex<double>> dft)
+  (*stft)(input, output, [&](std::span<std::complex<double>> dft)
   {
     core->shiftpitch(dft);
   });
-
-  // copy new output samples back
-  std::transform(
-    buffer.output.begin() - synthesis_window_size + analysis_window_size,
-    buffer.output.end() - synthesis_window_size,
-    output.begin(),
-    transform<double, float>);
-
-  // shift output buffer
-  std::copy(
-    buffer.output.begin() + synthesis_window_size,
-    buffer.output.end(),
-    buffer.output.begin());
-
-  // prepare for the next callback
-  std::fill(
-    buffer.output.begin() + analysis_window_size,
-    buffer.output.end(),
-    0);
 }
diff --git a/src/StftPitchShiftPlugin/Core.h b/src/StftPitchShiftPlugin/Core.h
@@ -5,30 +5,35 @@
 #include <StftPitchShift/STFT.h>
 #include <StftPitchShift/StftPitchShiftCore.h>
 
-class Core final
+class Core
 {
 
 public:
 
   Core(const double samplerate, const int blocksize, const int dftsize, const int overlap);
-  ~Core();
+  virtual ~Core();
 
   void normalize(bool value);
   void quefrency(double value);
   void timbre(double value);
   void pitch(std::vector<double> values);
 
-  bool compatible(const int blocksize) const;
+  virtual int latency() const = 0;
+  virtual bool compatible(const int blocksize) const = 0;
+  virtual void process(const std::span<const float> input, const std::span<float> output) = 0;
 
-  void process(const std::span<const float> input, const std::span<float> output);
+protected:
 
-private:
+  const double samplerate;
+  const int blocksize;
+  const int dftsize;
+  const int overlap;
+  const size_t analysis_window_size;
+  const size_t synthesis_window_size;
 
-  template<typename X, typename Y>
-  static Y transform(const X x) { return static_cast<Y>(x); }
+  void stft_pitch_shift(const std::span<const double> input, const std::span<double> output) const;
 
-  struct { size_t analysis_window_size, synthesis_window_size; } config;
-  struct { std::vector<double> input, output; } buffer;
+private:
 
   std::unique_ptr<stftpitchshift::STFT<double>> stft;
   std::unique_ptr<stftpitchshift::StftPitchShiftCore<double>> core;

diff --git a/src/StftPitchShiftPlugin/Core/DelayedCore.cpp b/src/StftPitchShiftPlugin/Core/DelayedCore.cpp
@@ -0,0 +1,69 @@
+#include <StftPitchShiftPlugin/Core/DelayedCore.h>
+
+DelayedCore::DelayedCore(const double samplerate, const int blocksize, const int dftsize, const int overlap) :
+  InstantCore(samplerate, dftsize + dftsize, dftsize, overlap), host_block_size(blocksize)
+{
+  const auto total_buffer_size = analysis_window_size + synthesis_window_size;
+
+  buffer.input.resize(total_buffer_size);
+  buffer.output.resize(total_buffer_size);
+
+  samples = 0;
+}
+
+DelayedCore::~DelayedCore()
+{
+}
+
+int DelayedCore::latency() const
+{
+  return 6 * dftsize - host_block_size;
+}
+
+bool DelayedCore::compatible(const int blocksize) const
+{
+  return static_cast<size_t>(blocksize) <= synthesis_window_size;
+}
+
+void DelayedCore::process(const std::span<const float> input, const std::span<float> output)
+{
+  const auto minsamples = input.size();
+  const auto maxsamples = synthesis_window_size;
+
+  // shift input buffer
+  std::copy(
+    buffer.input.begin() + minsamples,
+    buffer.input.end(),
+    buffer.input.begin());
+
+  // copy new input samples
+  std::copy(
+    input.begin(),
+    input.end(),
+    buffer.input.end() - minsamples);
+
+  // start processing as soon as enough samples are buffered
+  if ((samples += minsamples) >= maxsamples)
+  {
+    const auto x = buffer.input.data() + buffer.input.size();
+    const auto y = buffer.output.data() + buffer.output.size();
+
+    InstantCore::process(
+      std::span(x - samples, maxsamples),
+      std::span(y - samples, maxsamples));
+
+    samples %= maxsamples;
+  }
+
+  // copy new output samples back
+  std::copy(
+    buffer.output.begin(),
+    buffer.output.begin() + minsamples,
+    output.begin());
+
+  // shift output buffer
+  std::copy(
+    buffer.output.begin() + minsamples,
+    buffer.output.end(),
+    buffer.output.begin());
+}
diff --git a/src/StftPitchShiftPlugin/Core/DelayedCore.h b/src/StftPitchShiftPlugin/Core/DelayedCore.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <StftPitchShiftPlugin/Core/InstantCore.h>
+
+class DelayedCore : public InstantCore
+{
+
+public:
+
+  DelayedCore(const double samplerate, const int blocksize, const int dftsize, const int overlap);
+  ~DelayedCore();
+
+  int latency() const override;
+  bool compatible(const int blocksize) const override;
+  void process(const std::span<const float> input, const std::span<float> output) override;
+
+private:
+
+  const int host_block_size;
+
+  struct { std::vector<float> input, output; } buffer;
+
+  size_t samples;
+
+};
diff --git a/src/StftPitchShiftPlugin/Core/InstantCore.cpp b/src/StftPitchShiftPlugin/Core/InstantCore.cpp
@@ -0,0 +1,62 @@
+#include <StftPitchShiftPlugin/Core/InstantCore.h>
+
+InstantCore::InstantCore(const double samplerate, const int blocksize, const int dftsize, const int overlap) :
+  Core(samplerate, blocksize, dftsize, overlap)
+{
+  const auto total_buffer_size = analysis_window_size + synthesis_window_size;
+
+  buffer.input.resize(total_buffer_size);
+  buffer.output.resize(total_buffer_size);
+}
+
+InstantCore::~InstantCore()
+{
+}
+
+int InstantCore::latency() const
+{
+  return static_cast<int>(synthesis_window_size);
+}
+
+bool InstantCore::compatible(const int blocksize) const
+{
+  return static_cast<size_t>(blocksize) == synthesis_window_size;
+}
+
+void InstantCore::process(const std::span<const float> input, const std::span<float> output)
+{
+  // shift input buffer
+  std::copy(
+    buffer.input.begin() + synthesis_window_size,
+    buffer.input.end(),
+    buffer.input.begin());
+
+  // copy new input samples
+  std::transform(
+    input.begin(),
+    input.end(),
+    buffer.input.begin() + analysis_window_size,
+    transform<float, double>);
+
+  // apply pitch shifting within the built-in STFT routine
+  stft_pitch_shift(buffer.input, buffer.output);
+
+  // copy new output samples back
+  std::transform(
+    buffer.output.begin() - synthesis_window_size + analysis_window_size,
+    buffer.output.end() - synthesis_window_size,
+    output.begin(),
+    transform<double, float>);
+
+  // shift output buffer
+  std::copy(
+    buffer.output.begin() + synthesis_window_size,
+    buffer.output.end(),
+    buffer.output.begin());
+
+  // prepare for the next callback
+  std::fill(
+    buffer.output.begin() + analysis_window_size,
+    buffer.output.end(),
+    0);
+}
diff --git a/src/StftPitchShiftPlugin/Core/InstantCore.h b/src/StftPitchShiftPlugin/Core/InstantCore.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <StftPitchShiftPlugin/Core.h>
+
+class InstantCore : public Core
+{
+
+public:
+
+  InstantCore(const double samplerate, const int blocksize, const int dftsize, const int overlap);
+  ~InstantCore();
+
+  int latency() const override;
+  bool compatible(const int blocksize) const override;
+  void process(const std::span<const float> input, const std::span<float> output) override;
+
+private:
+
+  struct { std::vector<double> input, output; } buffer;
+
+  template<typename X, typename Y>
+  static Y transform(const X x) { return static_cast<Y>(x); }
+
+};
diff --git a/src/StftPitchShiftPlugin/Editor.cpp b/src/StftPitchShiftPlugin/Editor.cpp
@@ -6,7 +6,10 @@ Editor::Editor(juce::AudioProcessor& process) :
   GenericAudioProcessorEditor(process),
   process(process)
 {
-  setSize(500, 500);
+  const int w = 550;
+  const int h = 550;
+
+  setSize(w, h);
 }
 
 Editor::~Editor()