From 1da000b00cff25df6a09afe7b3d708aab695a1be Mon Sep 17 00:00:00 2001 From: tildearrow Date: Tue, 5 Sep 2023 04:38:57 -0500 Subject: [PATCH] GUI: per-chan osc multi-threading! --- CMakeLists.txt | 1 + src/engine/workPool.cpp | 134 ++++++++++++++++++++++++- src/engine/workPool.h | 43 +++++++- src/gui/chanOsc.cpp | 216 ++++++++++++++++++++++------------------ src/gui/gui.cpp | 8 ++ src/gui/gui.h | 10 +- src/gui/settings.cpp | 30 ++++++ 7 files changed, 337 insertions(+), 105 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d9234f4..8aa144dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -550,6 +550,7 @@ src/engine/blip_buf.c src/engine/brrUtils.c src/engine/safeReader.cpp src/engine/safeWriter.cpp +src/engine/workPool.cpp src/engine/cmdStream.cpp src/engine/cmdStreamOps.cpp src/engine/config.cpp diff --git a/src/engine/workPool.cpp b/src/engine/workPool.cpp index 6c294a7d..d7c1f6b2 100644 --- a/src/engine/workPool.cpp +++ b/src/engine/workPool.cpp @@ -17,4 +17,136 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ - #include "workPool.h" \ No newline at end of file +#include "workPool.h" +#include "../ta-log.h" +#include + +void* _workThread(void* inst) { + ((DivWorkThread*)inst)->run(); + return NULL; +} + +void DivWorkThread::run() { + std::unique_lock unique(selfLock); + DivPendingTask task; + + logV("running work thread"); + + while (true) { + lock.lock(); + if (tasks.empty()) { + lock.unlock(); + isBusy=false; + parent->notify.notify_one(); + if (terminate) { + break; + } + notify.wait(unique); + continue; + } else { + task=tasks.front(); + tasks.pop(); + lock.unlock(); + + task.func(task.funcArg); + + parent->busyCount--; + parent->notify.notify_one(); + } + } +} + +bool DivWorkThread::assign(const std::function& what, void* arg) { + lock.lock(); + if (tasks.size()>=30) { + lock.unlock(); + return false; + } + tasks.push(DivPendingTask(what,arg)); + parent->busyCount++; + parent->notify.notify_one(); + isBusy=true; + lock.unlock(); + notify.notify_one(); + return true; +} + +void DivWorkThread::wait() { + if (!isBusy) return; +} + +bool DivWorkThread::busy() { + return isBusy; +} + +void DivWorkThread::finish() { + lock.lock(); + terminate=true; + lock.unlock(); + notify.notify_one(); + thread->join(); +} + +void DivWorkThread::init(DivWorkPool* p) { + parent=p; + thread=new std::thread(_workThread,this); +} + +void DivWorkPool::push(const std::function& what, void* arg) { + //logV("submitting work"); + // if no work threads, just execute + if (!threaded) { + what(arg); + return; + } + + if (pos>=count) pos=0; + + for (unsigned int tryCount=0; tryCount unique(selfLock); + while (busyCount!=0) { + notify.wait_for(unique,std::chrono::milliseconds(100)); + } +} + +DivWorkPool::DivWorkPool(unsigned int threads): + threaded(threads>0), + count(threads), + pos(0), + busyCount(0) { + if (threaded) { + workThreads=new DivWorkThread[threads]; + for (unsigned int i=0; i #include +#include #include #include +#include "fixedQueue.h" + +class DivWorkPool; + +struct DivPendingTask { + std::function func; + void* funcArg; + DivPendingTask(std::function f, void* arg): + func(f), + funcArg(arg) {} + DivPendingTask(): + func(NULL), + funcArg(NULL) {} +}; + struct DivWorkThread { + DivWorkPool* parent; std::mutex lock; + std::mutex selfLock; std::thread* thread; std::condition_variable notify; - bool busy, terminate; + FixedQueue tasks; + std::atomic isBusy; + bool terminate; void run(); + bool assign(const std::function& what, void* arg); + void wait(); + bool busy(); + void finish(); + + void init(DivWorkPool* p); DivWorkThread(): - busy(false) {} + parent(NULL), + isBusy(false), + terminate(false) {} }; /** @@ -41,13 +69,20 @@ struct DivWorkThread { * it is highly recommended to use `new` when allocating a DivWorkPool. */ class DivWorkPool { + bool threaded; + std::mutex selfLock; + unsigned int count; + unsigned int pos; DivWorkThread* workThreads; public: + std::condition_variable notify; + std::atomic busyCount; + /** * push a new job to this work pool. * if all work threads are busy, this will block until one is free. */ - bool push(); + void push(const std::function& what, void* arg); /** * check whether this work pool is busy. @@ -57,7 +92,7 @@ class DivWorkPool { /** * wait for all work threads to finish. */ - bool wait(); + void wait(); DivWorkPool(unsigned int threads=0); ~DivWorkPool(); diff --git a/src/gui/chanOsc.cpp b/src/gui/chanOsc.cpp index 31b0cc0b..b01a165b 100644 --- a/src/gui/chanOsc.cpp +++ b/src/gui/chanOsc.cpp @@ -367,6 +367,12 @@ void FurnaceGUI::drawChanOsc() { ImGuiStyle& style=ImGui::GetStyle(); ImVec2 waveform[1024]; + // check work thread + if (chanOscWorkPool==NULL) { + logV("creating chan osc work pool"); + chanOscWorkPool=new DivWorkPool(settings.chanOscThreads); + } + // fill buffers for (int i=0; igetOscBuffer(i); @@ -379,137 +385,144 @@ void FurnaceGUI::drawChanOsc() { // process for (size_t i=0; irelatedBuf=oscBufs[i]; + fft_->relatedCh=oscChans[i]; + + if (fft_->relatedBuf!=NULL) { // prepare if (centerSettingReset) { - buf->readNeedle=buf->needle; + fft_->relatedBuf->readNeedle=fft_->relatedBuf->needle; } - int displaySize=(float)(buf->rate)*(chanOscWindowSize/1000.0f); - // check FFT status existence - if (!fft->ready) { - logD("creating FFT plan for channel %d",ch); - fft->inBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double)); - fft->outBuf=(fftw_complex*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(fftw_complex)); - fft->corrBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double)); - fft->plan=fftw_plan_dft_r2c_1d(FURNACE_FFT_SIZE,fft->inBuf,fft->outBuf,FFTW_ESTIMATE); - fft->planI=fftw_plan_dft_c2r_1d(FURNACE_FFT_SIZE,fft->outBuf,fft->corrBuf,FFTW_ESTIMATE); - if (fft->plan==NULL) { + if (!fft_->ready) { + logD("creating FFT plan for channel %d",fft_->relatedCh); + fft_->inBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double)); + fft_->outBuf=(fftw_complex*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(fftw_complex)); + fft_->corrBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double)); + fft_->plan=fftw_plan_dft_r2c_1d(FURNACE_FFT_SIZE,fft_->inBuf,fft_->outBuf,FFTW_ESTIMATE); + fft_->planI=fftw_plan_dft_c2r_1d(FURNACE_FFT_SIZE,fft_->outBuf,fft_->corrBuf,FFTW_ESTIMATE); + if (fft_->plan==NULL) { logE("failed to create plan!"); - } else if (fft->planI==NULL) { + } else if (fft_->planI==NULL) { logE("failed to create inverse plan!"); - } else if (fft->inBuf==NULL || fft->outBuf==NULL || fft->corrBuf==NULL) { + } else if (fft_->inBuf==NULL || fft_->outBuf==NULL || fft_->corrBuf==NULL) { logE("failed to create FFT buffers"); } else { - fft->ready=true; + fft_->ready=true; } } - if (fft->ready && e->isRunning()) { - // the STRATEGY - // 1. FFT of windowed signal - // 2. inverse FFT of auto-correlation - // 3. find size of one period - // 4. DFT of the fundamental of ONE PERIOD - // 5. now we can get phase information - // - // I have a feeling this could be simplified to two FFTs or even one... - // if you know how, please tell me + if (fft_->ready && e->isRunning()) { + chanOscWorkPool->push([this](void* fft_v) { + ChanOscStatus* fft=(ChanOscStatus*)fft_v; + DivDispatchOscBuffer* buf=fft->relatedBuf; + int ch=fft->relatedCh; - // initialization - double phase=0.0; - fft->loudEnough=false; - fft->needle=buf->needle; + // the STRATEGY + // 1. FFT of windowed signal + // 2. inverse FFT of auto-correlation + // 3. find size of one period + // 4. DFT of the fundamental of ONE PERIOD + // 5. now we can get phase information + // + // I have a feeling this could be simplified to two FFTs or even one... + // if you know how, please tell me - // first FFT - for (int j=0; jinBuf[j]=(double)buf->data[(unsigned short)(fft->needle-displaySize*2+((j*displaySize*2)/(FURNACE_FFT_SIZE)))]/32768.0; - if (fft->inBuf[j]>0.001 || fft->inBuf[j]<-0.001) fft->loudEnough=true; - fft->inBuf[j]*=0.55-0.45*cos(M_PI*(double)j/(double)(FURNACE_FFT_SIZE>>1)); - } + // initialization + double phase=0.0; + int displaySize=(float)(buf->rate)*(chanOscWindowSize/1000.0f); + fft->loudEnough=false; + fft->needle=buf->needle; - // only proceed if not quiet - if (fft->loudEnough) { - fftw_execute(fft->plan); - - // auto-correlation and second FFT + // first FFT for (int j=0; joutBuf[j][0]/=FURNACE_FFT_SIZE; - fft->outBuf[j][1]/=FURNACE_FFT_SIZE; - fft->outBuf[j][0]=fft->outBuf[j][0]*fft->outBuf[j][0]+fft->outBuf[j][1]*fft->outBuf[j][1]; - fft->outBuf[j][1]=0; - } - fft->outBuf[0][0]=0; - fft->outBuf[0][1]=0; - fft->outBuf[1][0]=0; - fft->outBuf[1][1]=0; - fftw_execute(fft->planI); - - // window - for (int j=0; j<(FURNACE_FFT_SIZE>>1); j++) { - fft->corrBuf[j]*=1.0-((double)j/(double)(FURNACE_FFT_SIZE<<1)); + fft->inBuf[j]=(double)buf->data[(unsigned short)(fft->needle-displaySize*2+((j*displaySize*2)/(FURNACE_FFT_SIZE)))]/32768.0; + if (fft->inBuf[j]>0.001 || fft->inBuf[j]<-0.001) fft->loudEnough=true; + fft->inBuf[j]*=0.55-0.45*cos(M_PI*(double)j/(double)(FURNACE_FFT_SIZE>>1)); } - // find size of period - double waveLenCandL=DBL_MAX; - double waveLenCandH=DBL_MIN; - fft->waveLen=FURNACE_FFT_SIZE-1; - fft->waveLenBottom=0; - fft->waveLenTop=0; + // only proceed if not quiet + if (fft->loudEnough) { + fftw_execute(fft->plan); - // find lowest point - for (int j=(FURNACE_FFT_SIZE>>2); j>2; j--) { - if (fft->corrBuf[j]corrBuf[j]; - fft->waveLenBottom=j; + // auto-correlation and second FFT + for (int j=0; joutBuf[j][0]/=FURNACE_FFT_SIZE; + fft->outBuf[j][1]/=FURNACE_FFT_SIZE; + fft->outBuf[j][0]=fft->outBuf[j][0]*fft->outBuf[j][0]+fft->outBuf[j][1]*fft->outBuf[j][1]; + fft->outBuf[j][1]=0; } - } - - // find highest point - for (int j=(FURNACE_FFT_SIZE>>1)-1; j>fft->waveLenBottom; j--) { - if (fft->corrBuf[j]>waveLenCandH) { - waveLenCandH=fft->corrBuf[j]; - fft->waveLen=j; - } - } - fft->waveLenTop=fft->waveLen; + fft->outBuf[0][0]=0; + fft->outBuf[0][1]=0; + fft->outBuf[1][0]=0; + fft->outBuf[1][1]=0; + fftw_execute(fft->planI); - // did we find the period size? - if (fft->waveLen<(FURNACE_FFT_SIZE-32)) { - // we got pitch - chanOscPitch[ch]=pow(1.0-(fft->waveLen/(double)(FURNACE_FFT_SIZE>>1)),4.0); + // window + for (int j=0; j<(FURNACE_FFT_SIZE>>1); j++) { + fft->corrBuf[j]*=1.0-((double)j/(double)(FURNACE_FFT_SIZE<<1)); + } + + // find size of period + double waveLenCandL=DBL_MAX; + double waveLenCandH=DBL_MIN; + fft->waveLen=FURNACE_FFT_SIZE-1; + fft->waveLenBottom=0; + fft->waveLenTop=0; + + // find lowest point + for (int j=(FURNACE_FFT_SIZE>>2); j>2; j--) { + if (fft->corrBuf[j]corrBuf[j]; + fft->waveLenBottom=j; + } + } - fft->waveLen*=(double)displaySize*2.0/(double)FURNACE_FFT_SIZE; - - // DFT of one period (x_1) - double dft[2]; - dft[0]=0.0; - dft[1]=0.0; - for (int j=fft->needle-1-(displaySize>>1)-(int)fft->waveLen, k=0; kwaveLen; j++, k++) { - double one=((double)buf->data[j&0xffff]/32768.0); - double two=(double)k*(-2.0*M_PI)/fft->waveLen; - dft[0]+=one*cos(two); - dft[1]+=one*sin(two); + // find highest point + for (int j=(FURNACE_FFT_SIZE>>1)-1; j>fft->waveLenBottom; j--) { + if (fft->corrBuf[j]>waveLenCandH) { + waveLenCandH=fft->corrBuf[j]; + fft->waveLen=j; + } } + fft->waveLenTop=fft->waveLen; - // calculate and lock into phase - phase=(0.5+(atan2(dft[1],dft[0])/(2.0*M_PI))); + // did we find the period size? + if (fft->waveLen<(FURNACE_FFT_SIZE-32)) { + // we got pitch + chanOscPitch[ch]=pow(1.0-(fft->waveLen/(double)(FURNACE_FFT_SIZE>>1)),4.0); + + fft->waveLen*=(double)displaySize*2.0/(double)FURNACE_FFT_SIZE; - if (chanOscWaveCorr) { - fft->needle-=phase*fft->waveLen; + // DFT of one period (x_1) + double dft[2]; + dft[0]=0.0; + dft[1]=0.0; + for (int j=fft->needle-1-(displaySize>>1)-(int)fft->waveLen, k=0; kwaveLen; j++, k++) { + double one=((double)buf->data[j&0xffff]/32768.0); + double two=(double)k*(-2.0*M_PI)/fft->waveLen; + dft[0]+=one*cos(two); + dft[1]+=one*sin(two); + } + + // calculate and lock into phase + phase=(0.5+(atan2(dft[1],dft[0])/(2.0*M_PI))); + + if (chanOscWaveCorr) { + fft->needle-=phase*fft->waveLen; + } } } - } - fft->needle-=displaySize; + fft->needle-=displaySize; + },fft_); } } } + chanOscWorkPool->wait(); // 0: none // 1: sqrt(chans) @@ -644,7 +657,12 @@ void FurnaceGUI::drawChanOsc() { } ImGui::PushClipRect(inRect.Min,inRect.Max,false); + ImDrawListFlags prevFlags=dl->Flags; + //if (!settings.oscAntiAlias) { + dl->Flags&=~(ImDrawListFlags_AntiAliasedLines|ImDrawListFlags_AntiAliasedLinesUseTex); + //} dl->AddPolyline(waveform,precision,color,ImDrawFlags_None,dpiScale); + dl->Flags=prevFlags; if (!chanOscTextFormat.empty()) { String text; diff --git a/src/gui/gui.cpp b/src/gui/gui.cpp index 7673a19c..ed43482e 100644 --- a/src/gui/gui.cpp +++ b/src/gui/gui.cpp @@ -6685,6 +6685,9 @@ bool FurnaceGUI::init() { } #endif + cpuCores=SDL_GetCPUCount(); + if (cpuCores<1) cpuCores=1; + logI("done!"); return true; } @@ -6857,6 +6860,10 @@ bool FurnaceGUI::finish() { backupTask.get(); } + if (chanOscWorkPool!=NULL) { + delete chanOscWorkPool; + } + return true; } @@ -7281,6 +7288,7 @@ FurnaceGUI::FurnaceGUI(): chanOscTextColor(1.0f,1.0f,1.0f,0.75f), chanOscGrad(64,64), chanOscGradTex(NULL), + chanOscWorkPool(NULL), followLog(true), #ifdef IS_MOBILE pianoOctaves(7), diff --git a/src/gui/gui.h b/src/gui/gui.h index afba9099..cdf67630 100644 --- a/src/gui/gui.h +++ b/src/gui/gui.h @@ -21,6 +21,7 @@ #define _FUR_GUI_H #include "../engine/engine.h" +#include "../engine/workPool.h" #include "../engine/waveSynth.h" #include "imgui.h" #include "imgui_impl_sdl2.h" @@ -1346,6 +1347,7 @@ class FurnaceGUI { int mobileEditPage; int wheelCalmDown; int shallDetectScale; + int cpuCores; float mobileMenuPos, autoButtonSize, mobileEditAnim; ImVec2 mobileEditButtonPos, mobileEditButtonSize; const int* curSysSection; @@ -1572,6 +1574,7 @@ class FurnaceGUI { int insIconsStyle; int classicChipOptions; int wasapiEx; + int chanOscThreads; unsigned int maxUndoSteps; String mainFontPath; String headFontPath; @@ -1747,6 +1750,7 @@ class FurnaceGUI { insIconsStyle(1), classicChipOptions(0), wasapiEx(0), + chanOscThreads(0), maxUndoSteps(100), mainFontPath(""), headFontPath(""), @@ -2047,6 +2051,7 @@ class FurnaceGUI { ImVec4 chanOscColor, chanOscTextColor; Gradient2D chanOscGrad; FurnaceGUITexture* chanOscGradTex; + DivWorkPool* chanOscWorkPool; float chanOscLP0[DIV_MAX_CHANS]; float chanOscLP1[DIV_MAX_CHANS]; float chanOscVol[DIV_MAX_CHANS]; @@ -2058,10 +2063,11 @@ class FurnaceGUI { double* inBuf; fftw_complex* outBuf; double* corrBuf; + DivDispatchOscBuffer* relatedBuf; size_t inBufPos; double inBufPosFrac; double waveLen; - int waveLenBottom, waveLenTop; + int waveLenBottom, waveLenTop, relatedCh; unsigned short needle; bool ready, loudEnough; fftw_plan plan; @@ -2070,11 +2076,13 @@ class FurnaceGUI { inBuf(NULL), outBuf(NULL), corrBuf(NULL), + relatedBuf(NULL), inBufPos(0), inBufPosFrac(0.0f), waveLen(0.0), waveLenBottom(0), waveLenTop(0), + relatedCh(0), needle(0), ready(false), loudEnough(false), diff --git a/src/gui/settings.cpp b/src/gui/settings.cpp index 18690514..31463778 100644 --- a/src/gui/settings.cpp +++ b/src/gui/settings.cpp @@ -400,6 +400,27 @@ void FurnaceGUI::drawSettings() { ImGui::SetTooltip("may cause issues with high-polling-rate mice when previewing notes."); } + pushWarningColor(settings.chanOscThreads>cpuCores,settings.chanOscThreads>(cpuCores*2)); + if (ImGui::InputInt("Per-channel oscilloscope threads",&settings.chanOscThreads)) { + if (settings.chanOscThreads<0) settings.chanOscThreads=0; + if (settings.chanOscThreads>(cpuCores*3)) settings.chanOscThreads=cpuCores*3; + if (settings.chanOscThreads>256) settings.chanOscThreads=256; + } + if (settings.chanOscThreads>=(cpuCores*3)) { + if (ImGui::IsItemHovered()) { + ImGui::SetTooltip("you're being silly, aren't you? that's enough."); + } + } else if (settings.chanOscThreads>(cpuCores*2)) { + if (ImGui::IsItemHovered()) { + ImGui::SetTooltip("what are you doing? stop!"); + } + } else if (settings.chanOscThreads>cpuCores) { + if (ImGui::IsItemHovered()) { + ImGui::SetTooltip("it is a bad idea to set this number higher than your CPU core count (%d)!",cpuCores); + } + } + popWarningColor(); + // SUBSECTION FILE CONFIG_SUBSECTION("File"); @@ -3262,6 +3283,7 @@ void FurnaceGUI::syncSettings() { settings.insIconsStyle=e->getConfInt("insIconsStyle",1); settings.classicChipOptions=e->getConfInt("classicChipOptions",0); settings.wasapiEx=e->getConfInt("wasapiEx",0); + settings.chanOscThreads=e->getConfInt("chanOscThreads",0); clampSetting(settings.mainFontSize,2,96); clampSetting(settings.headFontSize,2,96); @@ -3410,6 +3432,7 @@ void FurnaceGUI::syncSettings() { clampSetting(settings.insIconsStyle,0,2); clampSetting(settings.classicChipOptions,0,1); clampSetting(settings.wasapiEx,0,1); + clampSetting(settings.chanOscThreads,0,256); if (settings.exportLoops<0.0) settings.exportLoops=0.0; if (settings.exportFadeOut<0.0) settings.exportFadeOut=0.0; @@ -3665,6 +3688,7 @@ void FurnaceGUI::commitSettings() { e->setConf("insIconsStyle",settings.insIconsStyle); e->setConf("classicChipOptions",settings.classicChipOptions); e->setConf("wasapiEx",settings.wasapiEx); + e->setConf("chanOscThreads",settings.chanOscThreads); // colors for (int i=0; i