From a6efff8b02986daf6d3660c4f33c5f39cf3f3830 Mon Sep 17 00:00:00 2001
From: fearlessTobi <thm.frey@gmail.com>
Date: Thu, 23 Aug 2018 14:33:03 +0200
Subject: [PATCH 1/8] Add audio stretching support

---
 .gitmodules                                |  3 +++
 externals/CMakeLists.txt                   |  3 +++
 externals/soundtouch                       |  1 +
 src/audio_core/CMakeLists.txt              |  1 +
 src/audio_core/cubeb_sink.cpp              |  7 +++++++
 src/audio_core/null_sink.h                 |  4 ++++
 src/audio_core/sink_stream.h               |  2 ++
 src/audio_core/stream.cpp                  |  1 +
 src/core/settings.h                        |  1 +
 src/core/telemetry_session.cpp             |  3 +++
 src/yuzu/configuration/config.cpp          |  3 +++
 src/yuzu/configuration/configure_audio.cpp |  3 +++
 src/yuzu/configuration/configure_audio.ui  | 10 ++++++++++
 src/yuzu_cmd/config.cpp                    |  2 ++
 src/yuzu_cmd/default_ini.h                 |  6 ++++++
 15 files changed, 50 insertions(+)
 create mode 160000 externals/soundtouch
diff --git a/.gitmodules b/.gitmodules
index 4f4e8690b..e73ca99e3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -31,3 +31,6 @@
 [submodule "opus"]
     path = externals/opus
     url = https://github.com/ogniK5377/opus.git
+[submodule "soundtouch"]
+	path = externals/soundtouch
+	url = https://github.com/citra-emu/ext-soundtouch.git
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index b6eb36f20..600c45f0f 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -47,6 +47,9 @@ target_include_directories(microprofile INTERFACE ./microprofile)
 add_library(unicorn-headers INTERFACE)
 target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
 
+# SoundTouch
+add_subdirectory(soundtouch)
+
 # Xbyak
 if (ARCHITECTURE_x86_64)
     # Defined before "dynarmic" above
diff --git a/externals/soundtouch b/externals/soundtouch
new file mode 160000
index 000000000..060181eaf
--- /dev/null
+++ b/externals/soundtouch
@@ -0,0 +1 @@
+Subproject commit 060181eaf273180d3a7e87349895bd0cb6ccbf4a
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 82e4850f7..de5c291ce 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -24,6 +24,7 @@ add_library(audio_core STATIC
 create_target_directory_groups(audio_core)
 
 target_link_libraries(audio_core PUBLIC common core)
+target_link_libraries(audio_core PRIVATE SoundTouch)
 
 if(ENABLE_CUBEB)
     target_link_libraries(audio_core PRIVATE cubeb)
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 5a1177d0c..0f77fd162 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -85,6 +85,13 @@ public:
         }
     }
 
+    size_t SamplesInQueue(u32 num_channels) const {
+        if (!ctx)
+            return 0;
+
+        return queue.size() / num_channels;
+    }
+
     u32 GetNumChannels() const {
         return num_channels;
     }
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
index f235d93e5..fbb1bc225 100644
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@@ -21,6 +21,10 @@ public:
 private:
     struct NullSinkStreamImpl final : SinkStream {
         void EnqueueSamples(u32 /*num_channels*/, const std::vector<s16>& /*samples*/) override {}
+
+        size_t SamplesInQueue(u32 /*num_channels*/) const override {
+            return 0;
+        }
     } null_sink_stream;
 };
 
diff --git a/src/audio_core/sink_stream.h b/src/audio_core/sink_stream.h
index 41b6736d8..743a743a3 100644
--- a/src/audio_core/sink_stream.h
+++ b/src/audio_core/sink_stream.h
@@ -25,6 +25,8 @@ public:
      * @param samples Samples in interleaved stereo PCM16 format.
      */
     virtual void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) = 0;
+
+    virtual std::size_t SamplesInQueue(u32 num_channels) const = 0;
 };
 
 using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index dbae75d8c..49c6efc85 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -90,6 +90,7 @@ void Stream::PlayNextBuffer() {
     queued_buffers.pop();
 
     VolumeAdjustSamples(active_buffer->Samples());
+
     sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
 
     CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
diff --git a/src/core/settings.h b/src/core/settings.h
index 5bf1863e6..c25f8ba70 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -146,6 +146,7 @@ struct Values {
 
     // Audio
     std::string sink_id;
+    bool enable_audio_stretching;
     std::string audio_device_id;
     float volume;
 
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 3730e85b8..b0df154ca 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -120,6 +120,9 @@ TelemetrySession::TelemetrySession() {
     Telemetry::AppendOSInfo(field_collection);
 
     // Log user configuration information
+    AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
+    AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
+             Settings::values.enable_audio_stretching);
     AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
     AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
              Settings::values.use_multi_core);
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index c43e79e78..d229225b4 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -95,6 +95,8 @@ void Config::ReadValues() {
 
     qt_config->beginGroup("Audio");
     Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
+    Settings::values.enable_audio_stretching =
+        qt_config->value("enable_audio_stretching", true).toBool();
     Settings::values.audio_device_id =
         qt_config->value("output_device", "auto").toString().toStdString();
     Settings::values.volume = qt_config->value("volume", 1).toFloat();
@@ -230,6 +232,7 @@ void Config::SaveValues() {
 
     qt_config->beginGroup("Audio");
     qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
+    qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
     qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
     qt_config->setValue("volume", Settings::values.volume);
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_audio.cpp b/src/yuzu/configuration/configure_audio.cpp
index fbb813f6c..6ea59f2a3 100644
--- a/src/yuzu/configuration/configure_audio.cpp
+++ b/src/yuzu/configuration/configure_audio.cpp
@@ -46,6 +46,8 @@ void ConfigureAudio::setConfiguration() {
     }
     ui->output_sink_combo_box->setCurrentIndex(new_sink_index);
 
+    ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching);
+
     // The device list cannot be pre-populated (nor listed) until the output sink is known.
     updateAudioDevices(new_sink_index);
 
@@ -67,6 +69,7 @@ void ConfigureAudio::applyConfiguration() {
     Settings::values.sink_id =
         ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex())
             .toStdString();
+    Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked();
     Settings::values.audio_device_id =
         ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex())
             .toStdString();
diff --git a/src/yuzu/configuration/configure_audio.ui b/src/yuzu/configuration/configure_audio.ui
index ef67890dc..a29a0e265 100644
--- a/src/yuzu/configuration/configure_audio.ui
+++ b/src/yuzu/configuration/configure_audio.ui
@@ -31,6 +31,16 @@
         </item>
        </layout>
       </item>
+       <item>
+         <widget class="QCheckBox" name="toggle_audio_stretching">
+           <property name="toolTip">
+             <string>This post-processing effect adjusts audio speed to match emulation speed and helps prevent audio stutter. This however increases audio latency.</string>
+           </property>
+           <property name="text">
+             <string>Enable audio stretching</string>
+           </property>
+         </widget>
+       </item>
       <item>
        <layout class="QHBoxLayout">
         <item>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index f00b5a66b..991abda2e 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -108,6 +108,8 @@ void Config::ReadValues() {
 
     // Audio
     Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
+    Settings::values.enable_audio_stretching =
+        sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
     Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
     Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1);
 
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6ed9e7962..002a4ec15 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -150,6 +150,12 @@ swap_screen =
 # auto (default): Auto-select, null: No audio output, cubeb: Cubeb audio engine (if available)
 output_engine =
 
+# Whether or not to enable the audio-stretching post-processing effect.
+# This effect adjusts audio speed to match emulation speed and helps prevent audio stutter,
+# at the cost of increasing audio latency.
+# 0: No, 1 (default): Yes
+enable_audio_stretching =
+
 # Which audio device to use.
 # auto (default): Auto-select
 output_device =

From 112351d557a41ed214eeef3a94ad0b7644466fbc Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 8 Sep 2018 14:28:39 +0100
Subject: [PATCH 2/8] common: Implement a ring buffer

---
 src/common/CMakeLists.txt        |   1 +
 src/common/ring_buffer.h         | 111 ++++++++++++++++++++++++++
 src/tests/CMakeLists.txt         |   1 +
 src/tests/common/ring_buffer.cpp | 130 +++++++++++++++++++++++++++++++
 4 files changed, 243 insertions(+)
 create mode 100644 src/common/ring_buffer.h
 create mode 100644 src/tests/common/ring_buffer.cpp

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index f41946cc6..6a3f1fe08 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -71,6 +71,7 @@ add_library(common STATIC
     param_package.cpp
     param_package.h
     quaternion.h
+    ring_buffer.h
     scm_rev.cpp
     scm_rev.h
     scope_exit.h
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
new file mode 100644
index 000000000..30d934a38
--- /dev/null
+++ b/src/common/ring_buffer.h
@@ -0,0 +1,111 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cstddef>
+#include <cstring>
+#include <type_traits>
+#include <vector>
+#include "common/common_types.h"
+
+namespace Common {
+
+/// SPSC ring buffer
+/// @tparam T            Element type
+/// @tparam capacity     Number of slots in ring buffer
+/// @tparam granularity  Slot size in terms of number of elements
+template <typename T, size_t capacity, size_t granularity = 1>
+class RingBuffer {
+    /// A "slot" is made of `granularity` elements of `T`.
+    static constexpr size_t slot_size = granularity * sizeof(T);
+    // T must be safely memcpy-able and have a trivial default constructor.
+    static_assert(std::is_trivial_v<T>);
+    // Ensure capacity is sensible.
+    static_assert(capacity < std::numeric_limits<size_t>::max() / 2 / granularity);
+    static_assert((capacity & (capacity - 1)) == 0, "capacity must be a power of two");
+    // Ensure lock-free.
+    static_assert(std::atomic<size_t>::is_always_lock_free);
+
+public:
+    /// Pushes slots into the ring buffer
+    /// @param new_slots   Pointer to the slots to push
+    /// @param slot_count  Number of slots to push
+    /// @returns The number of slots actually pushed
+    size_t Push(const void* new_slots, size_t slot_count) {
+        const size_t write_index = m_write_index.load();
+        const size_t slots_free = capacity + m_read_index.load() - write_index;
+        const size_t push_count = std::min(slot_count, slots_free);
+
+        const size_t pos = write_index % capacity;
+        const size_t first_copy = std::min(capacity - pos, push_count);
+        const size_t second_copy = push_count - first_copy;
+
+        const char* in = static_cast<const char*>(new_slots);
+        std::memcpy(m_data.data() + pos * granularity, in, first_copy * slot_size);
+        in += first_copy * slot_size;
+        std::memcpy(m_data.data(), in, second_copy * slot_size);
+
+        m_write_index.store(write_index + push_count);
+
+        return push_count;
+    }
+
+    size_t Push(const std::vector<T>& input) {
+        return Push(input.data(), input.size());
+    }
+
+    /// Pops slots from the ring buffer
+    /// @param output     Where to store the popped slots
+    /// @param max_slots  Maximum number of slots to pop
+    /// @returns The number of slots actually popped
+    size_t Pop(void* output, size_t max_slots = ~size_t(0)) {
+        const size_t read_index = m_read_index.load();
+        const size_t slots_filled = m_write_index.load() - read_index;
+        const size_t pop_count = std::min(slots_filled, max_slots);
+
+        const size_t pos = read_index % capacity;
+        const size_t first_copy = std::min(capacity - pos, pop_count);
+        const size_t second_copy = pop_count - first_copy;
+
+        char* out = static_cast<char*>(output);
+        std::memcpy(out, m_data.data() + pos * granularity, first_copy * slot_size);
+        out += first_copy * slot_size;
+        std::memcpy(out, m_data.data(), second_copy * slot_size);
+
+        m_read_index.store(read_index + pop_count);
+
+        return pop_count;
+    }
+
+    std::vector<T> Pop(size_t max_slots = ~size_t(0)) {
+        std::vector<T> out(std::min(max_slots, capacity) * granularity);
+        const size_t count = Pop(out.data(), out.size() / granularity);
+        out.resize(count * granularity);
+        return out;
+    }
+
+    /// @returns Number of slots used
+    size_t Size() const {
+        return m_write_index.load() - m_read_index.load();
+    }
+
+    /// @returns Maximum size of ring buffer
+    constexpr size_t Capacity() const {
+        return capacity;
+    }
+
+private:
+    // It is important to align the below variables for performance reasons:
+    // Having them on the same cache-line would result in false-sharing between them.
+    alignas(128) std::atomic<size_t> m_read_index{0};
+    alignas(128) std::atomic<size_t> m_write_index{0};
+
+    std::array<T, granularity * capacity> m_data;
+};
+
+} // namespace Common
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 4d74bb395..4e75a72ec 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_executable(tests
     common/param_package.cpp
+    common/ring_buffer.cpp
     core/arm/arm_test_common.cpp
     core/arm/arm_test_common.h
     core/core_timing.cpp
diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp
new file mode 100644
index 000000000..f3fe57839
--- /dev/null
+++ b/src/tests/common/ring_buffer.cpp
@@ -0,0 +1,130 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <numeric>
+#include <thread>
+#include <vector>
+#include <catch2/catch.hpp>
+#include "common/ring_buffer.h"
+
+namespace Common {
+
+TEST_CASE("RingBuffer: Basic Tests", "[common]") {
+    RingBuffer<char, 4, 1> buf;
+
+    // Pushing values into a ring buffer with space should succeed.
+    for (size_t i = 0; i < 4; i++) {
+        const char elem = static_cast<char>(i);
+        const size_t count = buf.Push(&elem, 1);
+        REQUIRE(count == 1);
+    }
+
+    REQUIRE(buf.Size() == 4);
+
+    // Pushing values into a full ring buffer should fail.
+    {
+        const char elem = static_cast<char>(42);
+        const size_t count = buf.Push(&elem, 1);
+        REQUIRE(count == 0);
+    }
+
+    REQUIRE(buf.Size() == 4);
+
+    // Popping multiple values from a ring buffer with values should succeed.
+    {
+        const std::vector<char> popped = buf.Pop(2);
+        REQUIRE(popped.size() == 2);
+        REQUIRE(popped[0] == 0);
+        REQUIRE(popped[1] == 1);
+    }
+
+    REQUIRE(buf.Size() == 2);
+
+    // Popping a single value from a ring buffer with values should succeed.
+    {
+        const std::vector<char> popped = buf.Pop(1);
+        REQUIRE(popped.size() == 1);
+        REQUIRE(popped[0] == 2);
+    }
+
+    REQUIRE(buf.Size() == 1);
+
+    // Pushing more values than space available should partially suceed.
+    {
+        std::vector<char> to_push(6);
+        std::iota(to_push.begin(), to_push.end(), 88);
+        const size_t count = buf.Push(to_push);
+        REQUIRE(count == 3);
+    }
+
+    REQUIRE(buf.Size() == 4);
+
+    // Doing an unlimited pop should pop all values.
+    {
+        const std::vector<char> popped = buf.Pop();
+        REQUIRE(popped.size() == 4);
+        REQUIRE(popped[0] == 3);
+        REQUIRE(popped[1] == 88);
+        REQUIRE(popped[2] == 89);
+        REQUIRE(popped[3] == 90);
+    }
+
+    REQUIRE(buf.Size() == 0);
+}
+
+TEST_CASE("RingBuffer: Threaded Test", "[common]") {
+    RingBuffer<char, 4, 2> buf;
+    const char seed = 42;
+    const size_t count = 1000000;
+    size_t full = 0;
+    size_t empty = 0;
+
+    const auto next_value = [](std::array<char, 2>& value) {
+        value[0] += 1;
+        value[1] += 2;
+    };
+
+    std::thread producer{[&] {
+        std::array<char, 2> value = {seed, seed};
+        size_t i = 0;
+        while (i < count) {
+            if (const size_t c = buf.Push(&value[0], 1); c > 0) {
+                REQUIRE(c == 1);
+                i++;
+                next_value(value);
+            } else {
+                full++;
+                std::this_thread::yield();
+            }
+        }
+    }};
+
+    std::thread consumer{[&] {
+        std::array<char, 2> value = {seed, seed};
+        size_t i = 0;
+        while (i < count) {
+            if (const std::vector<char> v = buf.Pop(1); v.size() > 0) {
+                REQUIRE(v.size() == 2);
+                REQUIRE(v[0] == value[0]);
+                REQUIRE(v[1] == value[1]);
+                i++;
+                next_value(value);
+            } else {
+                empty++;
+                std::this_thread::yield();
+            }
+        }
+    }};
+
+    producer.join();
+    consumer.join();
+
+    REQUIRE(buf.Size() == 0);
+    printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty);
+}
+
+} // namespace Common

From 6d9dd1dc6dacbba9907e7c4e92e2d9d111ef44f4 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 8 Sep 2018 14:55:11 +0100
Subject: [PATCH 3/8] cubeb_sink: Use RingBuffer

---
 src/audio_core/cubeb_sink.cpp | 66 ++++++++++++++---------------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 0f77fd162..552bcd051 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -4,18 +4,17 @@
 
 #include <algorithm>
 #include <cstring>
-#include <mutex>
-
 #include "audio_core/cubeb_sink.h"
 #include "audio_core/stream.h"
 #include "common/logging/log.h"
+#include "common/ring_buffer.h"
 
 namespace AudioCore {
 
-class SinkStreamImpl final : public SinkStream {
+class CubebSinkStream final : public SinkStream {
 public:
-    SinkStreamImpl(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
-                   const std::string& name)
+    CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
+                    const std::string& name)
         : ctx{ctx}, num_channels{num_channels_} {
 
         if (num_channels == 6) {
@@ -38,7 +37,7 @@ public:
 
         if (cubeb_stream_init(ctx, &stream_backend, name.c_str(), nullptr, nullptr, output_device,
                               &params, std::max(512u, minimum_latency),
-                              &SinkStreamImpl::DataCallback, &SinkStreamImpl::StateCallback,
+                              &CubebSinkStream::DataCallback, &CubebSinkStream::StateCallback,
                               this) != CUBEB_OK) {
             LOG_CRITICAL(Audio_Sink, "Error initializing cubeb stream");
             return;
@@ -50,7 +49,7 @@ public:
         }
     }
 
-    ~SinkStreamImpl() {
+    ~CubebSinkStream() {
         if (!ctx) {
             return;
         }
@@ -63,33 +62,27 @@ public:
     }
 
     void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) override {
-        if (!ctx) {
-            return;
-        }
-
-        std::lock_guard lock{queue_mutex};
-
-        queue.reserve(queue.size() + samples.size() * GetNumChannels());
-
         if (is_6_channel) {
             // Downsample 6 channels to 2
             const size_t sample_count_copy_size = samples.size() * 2;
-            queue.reserve(sample_count_copy_size);
+            std::vector<s16> buf;
+            buf.reserve(sample_count_copy_size);
             for (size_t i = 0; i < samples.size(); i += num_channels) {
-                queue.push_back(samples[i]);
-                queue.push_back(samples[i + 1]);
+                buf.push_back(samples[i]);
+                buf.push_back(samples[i + 1]);
             }
-        } else {
-            // Copy as-is
-            std::copy(samples.begin(), samples.end(), std::back_inserter(queue));
+            queue.Push(buf);
+            return;
         }
+
+        queue.Push(samples);
     }
 
-    size_t SamplesInQueue(u32 num_channels) const {
+    size_t SamplesInQueue(u32 num_channels) const override {
         if (!ctx)
             return 0;
 
-        return queue.size() / num_channels;
+        return queue.Size() / num_channels;
     }
 
     u32 GetNumChannels() const {
@@ -104,8 +97,7 @@ private:
     u32 num_channels{};
     bool is_6_channel{};
 
-    std::mutex queue_mutex;
-    std::vector<s16> queue;
+    Common::RingBuffer<s16, 0x10000> queue;
 
     static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                              void* output_buffer, long num_frames);
@@ -151,38 +143,32 @@ CubebSink::~CubebSink() {
 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
                                          const std::string& name) {
     sink_streams.push_back(
-        std::make_unique<SinkStreamImpl>(ctx, sample_rate, num_channels, output_device, name));
+        std::make_unique<CubebSinkStream>(ctx, sample_rate, num_channels, output_device, name));
     return *sink_streams.back();
 }
 
-long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
+long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                                   void* output_buffer, long num_frames) {
-    SinkStreamImpl* impl = static_cast<SinkStreamImpl*>(user_data);
+    CubebSinkStream* impl = static_cast<CubebSinkStream*>(user_data);
     u8* buffer = reinterpret_cast<u8*>(output_buffer);
 
     if (!impl) {
         return {};
     }
 
-    std::lock_guard lock{impl->queue_mutex};
+    const size_t max_samples_to_write = impl->GetNumChannels() * num_frames;
+    const size_t samples_written = impl->queue.Pop(buffer, max_samples_to_write);
 
-    const size_t frames_to_write{
-        std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
-
-    memcpy(buffer, impl->queue.data(), frames_to_write * sizeof(s16) * impl->GetNumChannels());
-    impl->queue.erase(impl->queue.begin(),
-                      impl->queue.begin() + frames_to_write * impl->GetNumChannels());
-
-    if (frames_to_write < num_frames) {
+    if (samples_written < max_samples_to_write) {
         // Fill the rest of the frames with silence
-        memset(buffer + frames_to_write * sizeof(s16) * impl->GetNumChannels(), 0,
-               (num_frames - frames_to_write) * sizeof(s16) * impl->GetNumChannels());
+        std::memset(buffer + samples_written * sizeof(s16), 0,
+                    (max_samples_to_write - samples_written) * sizeof(s16));
     }
 
     return num_frames;
 }
 
-void SinkStreamImpl::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
+void CubebSinkStream::StateCallback(cubeb_stream* stream, void* user_data, cubeb_state state) {}
 
 std::vector<std::string> ListCubebSinkDevices() {
     std::vector<std::string> device_list;

From 7e697ab7ff729aee3d88eb18cd130132786444ac Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 8 Sep 2018 16:15:40 +0100
Subject: [PATCH 4/8] cubeb_sink: Hold last available value instead of writing
 zeros

This reduces clicking in output audio should we underrun.
---
 src/audio_core/cubeb_sink.cpp | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 552bcd051..3c129122f 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -89,6 +89,10 @@ public:
         return num_channels;
     }
 
+    u32 GetNumChannelsInQueue() const {
+        return num_channels == 1 ? 1 : 2;
+    }
+
 private:
     std::vector<std::string> device_list;
 
@@ -98,6 +102,7 @@ private:
     bool is_6_channel{};
 
     Common::RingBuffer<s16, 0x10000> queue;
+    std::array<s16, 2> last_frame;
 
     static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                              void* output_buffer, long num_frames);
@@ -156,13 +161,18 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const
         return {};
     }
 
-    const size_t max_samples_to_write = impl->GetNumChannels() * num_frames;
+    const size_t num_channels = impl->GetNumChannelsInQueue();
+    const size_t max_samples_to_write = num_channels * num_frames;
     const size_t samples_written = impl->queue.Pop(buffer, max_samples_to_write);
 
-    if (samples_written < max_samples_to_write) {
-        // Fill the rest of the frames with silence
-        std::memset(buffer + samples_written * sizeof(s16), 0,
-                    (max_samples_to_write - samples_written) * sizeof(s16));
+    if (samples_written >= num_channels) {
+        std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
+                    num_channels * sizeof(s16));
+    }
+
+    // Fill the rest of the frames with last_frame
+    for (size_t i = samples_written; i < max_samples_to_write; i += num_channels) {
+        std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
     }
 
     return num_frames;

From e51bd49f87052d0706a016fab88d18ffef05b8b1 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 8 Sep 2018 16:48:13 +0100
Subject: [PATCH 5/8] audio_core: Add audio stretcher

---
 src/audio_core/CMakeLists.txt   |  2 ++
 src/audio_core/time_stretch.cpp | 64 +++++++++++++++++++++++++++++++++
 src/audio_core/time_stretch.h   | 35 ++++++++++++++++++
 3 files changed, 101 insertions(+)
 create mode 100644 src/audio_core/time_stretch.cpp
 create mode 100644 src/audio_core/time_stretch.h

diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index de5c291ce..c381dbe1d 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -17,6 +17,8 @@ add_library(audio_core STATIC
     sink_stream.h
     stream.cpp
     stream.h
+    time_stretch.cpp
+    time_stretch.h
 
     $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
 )
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
new file mode 100644
index 000000000..17e128323
--- /dev/null
+++ b/src/audio_core/time_stretch.cpp
@@ -0,0 +1,64 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include "audio_core/time_stretch.h"
+#include "common/logging/log.h"
+
+namespace AudioCore {
+
+TimeStretcher::TimeStretcher(u32 sample_rate, u32 channel_count)
+    : m_sample_rate(sample_rate), m_channel_count(channel_count) {
+    m_sound_touch.setChannels(channel_count);
+    m_sound_touch.setSampleRate(sample_rate);
+    m_sound_touch.setPitch(1.0);
+    m_sound_touch.setTempo(1.0);
+}
+
+void TimeStretcher::Clear() {
+    m_sound_touch.clear();
+}
+
+size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num_out) {
+    const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
+
+    // We were given actual_samples number of samples, and num_samples were requested from us.
+    double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
+
+    const double max_latency = 0.3; // seconds
+    const double max_backlog = m_sample_rate * max_latency / 1000.0 / m_stretch_ratio;
+    const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
+    if (backlog_fullness > 5.0) {
+        // Too many samples in backlog: Don't push anymore on
+        num_in = 0;
+    }
+
+    // We ideally want the backlog to be about 50% full.
+    // This gives some headroom both ways to prevent underflow and overflow.
+    // We tweak current_ratio to encourage this.
+    constexpr double tweak_time_scale = 0.05; // seconds
+    const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale);
+    current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0);
+
+    // This low-pass filter smoothes out variance in the calculated stretch ratio.
+    // The time-scale determines how responsive this filter is.
+    constexpr double lpf_time_scale = 2.0; // seconds
+    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
+    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
+
+    // Place a lower limit of 10% speed.  When a game boots up, there will be
+    // many silence samples.  These do not need to be timestretched.
+    m_stretch_ratio = std::max(m_stretch_ratio, 0.1);
+    m_sound_touch.setTempo(m_stretch_ratio);
+
+    LOG_DEBUG(Audio, "Audio Stretching: samples:{}/{} ratio:{} backlog:{} gain: {}", num_in, num_out,
+              m_stretch_ratio, backlog_fullness, lpf_gain);
+
+    m_sound_touch.putSamples(in, num_in);
+    return m_sound_touch.receiveSamples(out, num_out);
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
new file mode 100644
index 000000000..cdead34a2
--- /dev/null
+++ b/src/audio_core/time_stretch.h
@@ -0,0 +1,35 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <SoundTouch.h>
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class TimeStretcher {
+public:
+    TimeStretcher(u32 sample_rate, u32 channel_count);
+
+    /// @param in       Input sample buffer
+    /// @param num_in   Number of input frames in `in`
+    /// @param out      Output sample buffer
+    /// @param num_out  Desired number of output frames in `out`
+    /// @returns Actual number of frames written to `out`
+    size_t Process(const s16* in, size_t num_in, s16* out, size_t num_out);
+
+    void Clear();
+
+private:
+    u32 m_sample_rate;
+    u32 m_channel_count;
+    std::array<s16, 2> m_last_stretched_sample = {};
+    soundtouch::SoundTouch m_sound_touch;
+    double m_stretch_ratio = 1.0;
+};
+
+} // namespace AudioCore

From 1aa195a9c0416c986c8224d9dc66d9d5e45401a0 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sat, 8 Sep 2018 16:49:04 +0100
Subject: [PATCH 6/8] cubeb_sink: Perform audio stretching

---
 src/audio_core/cubeb_sink.cpp   | 37 ++++++++++++++++++---------------
 src/audio_core/time_stretch.cpp | 12 +++++------
 src/audio_core/time_stretch.h   |  1 -
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 3c129122f..7982306b3 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -6,8 +6,10 @@
 #include <cstring>
 #include "audio_core/cubeb_sink.h"
 #include "audio_core/stream.h"
+#include "audio_core/time_stretch.h"
 #include "common/logging/log.h"
 #include "common/ring_buffer.h"
+#include "core/settings.h"
 
 namespace AudioCore {
 
@@ -15,14 +17,8 @@ class CubebSinkStream final : public SinkStream {
 public:
     CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
                     const std::string& name)
-        : ctx{ctx}, num_channels{num_channels_} {
-
-        if (num_channels == 6) {
-            // 6-channel audio does not seem to work with cubeb + SDL, so we downsample this to 2
-            // channel for now
-            is_6_channel = true;
-            num_channels = 2;
-        }
+        : ctx{ctx}, is_6_channel{num_channels_ == 6}, num_channels{std::min(num_channels_, 2u)},
+          time_stretch{sample_rate, num_channels} {
 
         cubeb_stream_params params{};
         params.rate = sample_rate;
@@ -89,10 +85,6 @@ public:
         return num_channels;
     }
 
-    u32 GetNumChannelsInQueue() const {
-        return num_channels == 1 ? 1 : 2;
-    }
-
 private:
     std::vector<std::string> device_list;
 
@@ -103,6 +95,7 @@ private:
 
     Common::RingBuffer<s16, 0x10000> queue;
     std::array<s16, 2> last_frame;
+    TimeStretcher time_stretch;
 
     static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
                              void* output_buffer, long num_frames);
@@ -153,7 +146,7 @@ SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
 }
 
 long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
-                                  void* output_buffer, long num_frames) {
+                                   void* output_buffer, long num_frames) {
     CubebSinkStream* impl = static_cast<CubebSinkStream*>(user_data);
     u8* buffer = reinterpret_cast<u8*>(output_buffer);
 
@@ -161,9 +154,19 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const
         return {};
     }
 
-    const size_t num_channels = impl->GetNumChannelsInQueue();
-    const size_t max_samples_to_write = num_channels * num_frames;
-    const size_t samples_written = impl->queue.Pop(buffer, max_samples_to_write);
+    const size_t num_channels = impl->GetNumChannels();
+    const size_t samples_to_write = num_channels * num_frames;
+    size_t samples_written;
+
+    if (Settings::values.enable_audio_stretching) {
+        const std::vector<s16> in{impl->queue.Pop()};
+        const size_t num_in{in.size() / num_channels};
+        s16* const out{reinterpret_cast<s16*>(buffer)};
+        const size_t out_frames = impl->time_stretch.Process(in.data(), num_in, out, num_frames);
+        samples_written = out_frames * num_channels;
+    } else {
+        samples_written = impl->queue.Pop(buffer, samples_to_write);
+    }
 
     if (samples_written >= num_channels) {
         std::memcpy(&impl->last_frame[0], buffer + (samples_written - num_channels) * sizeof(s16),
@@ -171,7 +174,7 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const
     }
 
     // Fill the rest of the frames with last_frame
-    for (size_t i = samples_written; i < max_samples_to_write; i += num_channels) {
+    for (size_t i = samples_written; i < samples_to_write; i += num_channels) {
         std::memcpy(buffer + i * sizeof(s16), &impl->last_frame[0], num_channels * sizeof(s16));
     }
 
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
index 17e128323..d2e3391c1 100644
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@@ -28,8 +28,8 @@ size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num
     // We were given actual_samples number of samples, and num_samples were requested from us.
     double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);
 
-    const double max_latency = 0.3; // seconds
-    const double max_backlog = m_sample_rate * max_latency / 1000.0 / m_stretch_ratio;
+    const double max_latency = 1.0; // seconds
+    const double max_backlog = m_sample_rate * max_latency;
     const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
     if (backlog_fullness > 5.0) {
         // Too many samples in backlog: Don't push anymore on
@@ -49,13 +49,13 @@ size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num
     const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
     m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);
 
-    // Place a lower limit of 10% speed.  When a game boots up, there will be
+    // Place a lower limit of 5% speed.  When a game boots up, there will be
     // many silence samples.  These do not need to be timestretched.
-    m_stretch_ratio = std::max(m_stretch_ratio, 0.1);
+    m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
     m_sound_touch.setTempo(m_stretch_ratio);
 
-    LOG_DEBUG(Audio, "Audio Stretching: samples:{}/{} ratio:{} backlog:{} gain: {}", num_in, num_out,
-              m_stretch_ratio, backlog_fullness, lpf_gain);
+    LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, m_stretch_ratio,
+              backlog_fullness);
 
     m_sound_touch.putSamples(in, num_in);
     return m_sound_touch.receiveSamples(out, num_out);
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
index cdead34a2..0322b8b78 100644
--- a/src/audio_core/time_stretch.h
+++ b/src/audio_core/time_stretch.h
@@ -27,7 +27,6 @@ public:
 private:
     u32 m_sample_rate;
     u32 m_channel_count;
-    std::array<s16, 2> m_last_stretched_sample = {};
     soundtouch::SoundTouch m_sound_touch;
     double m_stretch_ratio = 1.0;
 };

From 55af5bda5574a34716680b23aab6482d340a00ed Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 9 Sep 2018 09:16:48 +0100
Subject: [PATCH 7/8] cubeb_sink: Downsample arbitrary number of channels

---
 src/audio_core/cubeb_sink.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 7982306b3..067dc98d2 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -17,8 +17,8 @@ class CubebSinkStream final : public SinkStream {
 public:
     CubebSinkStream(cubeb* ctx, u32 sample_rate, u32 num_channels_, cubeb_devid output_device,
                     const std::string& name)
-        : ctx{ctx}, is_6_channel{num_channels_ == 6}, num_channels{std::min(num_channels_, 2u)},
-          time_stretch{sample_rate, num_channels} {
+        : ctx{ctx}, num_channels{std::min(num_channels_, 2u)}, time_stretch{sample_rate,
+                                                                            num_channels} {
 
         cubeb_stream_params params{};
         params.rate = sample_rate;
@@ -57,15 +57,15 @@ public:
         cubeb_stream_destroy(stream_backend);
     }
 
-    void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) override {
-        if (is_6_channel) {
+    void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
+        if (source_num_channels > num_channels) {
             // Downsample 6 channels to 2
-            const size_t sample_count_copy_size = samples.size() * 2;
             std::vector<s16> buf;
-            buf.reserve(sample_count_copy_size);
-            for (size_t i = 0; i < samples.size(); i += num_channels) {
-                buf.push_back(samples[i]);
-                buf.push_back(samples[i + 1]);
+            buf.reserve(samples.size() * num_channels / source_num_channels);
+            for (size_t i = 0; i < samples.size(); i += source_num_channels) {
+                for (size_t ch = 0; ch < num_channels; ch++) {
+                    buf.push_back(samples[i + ch]);
+                }
             }
             queue.Push(buf);
             return;
@@ -91,7 +91,6 @@ private:
     cubeb* ctx{};
     cubeb_stream* stream_backend{};
     u32 num_channels{};
-    bool is_6_channel{};
 
     Common::RingBuffer<s16, 0x10000> queue;
     std::array<s16, 2> last_frame;

From 957ddab6796cb6f644c60993c3035d8bd9c0a398 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 12 Sep 2018 18:07:16 +0100
Subject: [PATCH 8/8] audio_core: Flush stream when not playing anything

---
 src/audio_core/cubeb_sink.cpp   | 11 +++++++++++
 src/audio_core/null_sink.h      |  2 ++
 src/audio_core/sink_stream.h    |  2 ++
 src/audio_core/stream.cpp       |  2 ++
 src/audio_core/time_stretch.cpp |  4 ++++
 src/audio_core/time_stretch.h   |  2 ++
 6 files changed, 23 insertions(+)

diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 067dc98d2..79155a7a0 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <atomic>
 #include <cstring>
 #include "audio_core/cubeb_sink.h"
 #include "audio_core/stream.h"
@@ -81,6 +82,10 @@ public:
         return queue.Size() / num_channels;
     }
 
+    void Flush() override {
+        should_flush = true;
+    }
+
     u32 GetNumChannels() const {
         return num_channels;
     }
@@ -94,6 +99,7 @@ private:
 
     Common::RingBuffer<s16, 0x10000> queue;
     std::array<s16, 2> last_frame;
+    std::atomic<bool> should_flush{};
     TimeStretcher time_stretch;
 
     static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
@@ -163,6 +169,11 @@ long CubebSinkStream::DataCallback(cubeb_stream* stream, void* user_data, const
         s16* const out{reinterpret_cast<s16*>(buffer)};
         const size_t out_frames = impl->time_stretch.Process(in.data(), num_in, out, num_frames);
         samples_written = out_frames * num_channels;
+
+        if (impl->should_flush) {
+            impl->time_stretch.Flush();
+            impl->should_flush = false;
+        }
     } else {
         samples_written = impl->queue.Pop(buffer, samples_to_write);
     }
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
index fbb1bc225..2ed0c83b6 100644
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@@ -25,6 +25,8 @@ private:
         size_t SamplesInQueue(u32 /*num_channels*/) const override {
             return 0;
         }
+
+        void Flush() override {}
     } null_sink_stream;
 };
 
diff --git a/src/audio_core/sink_stream.h b/src/audio_core/sink_stream.h
index 743a743a3..4309ad094 100644
--- a/src/audio_core/sink_stream.h
+++ b/src/audio_core/sink_stream.h
@@ -27,6 +27,8 @@ public:
     virtual void EnqueueSamples(u32 num_channels, const std::vector<s16>& samples) = 0;
 
     virtual std::size_t SamplesInQueue(u32 num_channels) const = 0;
+
+    virtual void Flush() = 0;
 };
 
 using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 49c6efc85..84dcdd98d 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -73,6 +73,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
 void Stream::PlayNextBuffer() {
     if (!IsPlaying()) {
         // Ensure we are in playing state before playing the next buffer
+        sink_stream.Flush();
         return;
     }
 
@@ -83,6 +84,7 @@ void Stream::PlayNextBuffer() {
 
     if (queued_buffers.empty()) {
         // No queued buffers - we are effectively paused
+        sink_stream.Flush();
         return;
     }
 
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
index d2e3391c1..da094c46b 100644
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@@ -22,6 +22,10 @@ void TimeStretcher::Clear() {
     m_sound_touch.clear();
 }
 
+void TimeStretcher::Flush() {
+    m_sound_touch.flush();
+}
+
 size_t TimeStretcher::Process(const s16* in, size_t num_in, s16* out, size_t num_out) {
     const double time_delta = static_cast<double>(num_out) / m_sample_rate; // seconds
 
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
index 0322b8b78..7e39e695e 100644
--- a/src/audio_core/time_stretch.h
+++ b/src/audio_core/time_stretch.h
@@ -24,6 +24,8 @@ public:
 
     void Clear();
 
+    void Flush();
+
 private:
     u32 m_sample_rate;
     u32 m_channel_count;