GUI: per-chan osc multi-threading!

This commit is contained in:
tildearrow 2023-09-05 04:38:57 -05:00
parent c99899a002
commit 1da000b00c
7 changed files with 337 additions and 105 deletions

View file

@ -550,6 +550,7 @@ src/engine/blip_buf.c
src/engine/brrUtils.c
src/engine/safeReader.cpp
src/engine/safeWriter.cpp
src/engine/workPool.cpp
src/engine/cmdStream.cpp
src/engine/cmdStreamOps.cpp
src/engine/config.cpp

View file

@ -17,4 +17,136 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "workPool.h"
#include "workPool.h"
#include "../ta-log.h"
#include <thread>
void* _workThread(void* inst) {
((DivWorkThread*)inst)->run();
return NULL;
}
void DivWorkThread::run() {
std::unique_lock<std::mutex> unique(selfLock);
DivPendingTask task;
logV("running work thread");
while (true) {
lock.lock();
if (tasks.empty()) {
lock.unlock();
isBusy=false;
parent->notify.notify_one();
if (terminate) {
break;
}
notify.wait(unique);
continue;
} else {
task=tasks.front();
tasks.pop();
lock.unlock();
task.func(task.funcArg);
parent->busyCount--;
parent->notify.notify_one();
}
}
}
bool DivWorkThread::assign(const std::function<void(void*)>& what, void* arg) {
lock.lock();
if (tasks.size()>=30) {
lock.unlock();
return false;
}
tasks.push(DivPendingTask(what,arg));
parent->busyCount++;
parent->notify.notify_one();
isBusy=true;
lock.unlock();
notify.notify_one();
return true;
}
void DivWorkThread::wait() {
if (!isBusy) return;
}
bool DivWorkThread::busy() {
return isBusy;
}
void DivWorkThread::finish() {
lock.lock();
terminate=true;
lock.unlock();
notify.notify_one();
thread->join();
}
void DivWorkThread::init(DivWorkPool* p) {
parent=p;
thread=new std::thread(_workThread,this);
}
void DivWorkPool::push(const std::function<void(void*)>& what, void* arg) {
//logV("submitting work");
// if no work threads, just execute
if (!threaded) {
what(arg);
return;
}
if (pos>=count) pos=0;
for (unsigned int tryCount=0; tryCount<count; tryCount++) {
if (workThreads[pos++].assign(what,arg)) return;
}
// all threads are busy
logV("all busy");
what(arg);
}
bool DivWorkPool::busy() {
if (!threaded) return false;
for (unsigned int i=0; i<count; i++) {
if (workThreads[i].busy()) return true;
}
return false;
}
void DivWorkPool::wait() {
if (!threaded) return;
std::unique_lock<std::mutex> unique(selfLock);
while (busyCount!=0) {
notify.wait_for(unique,std::chrono::milliseconds(100));
}
}
DivWorkPool::DivWorkPool(unsigned int threads):
threaded(threads>0),
count(threads),
pos(0),
busyCount(0) {
if (threaded) {
workThreads=new DivWorkThread[threads];
for (unsigned int i=0; i<count; i++) {
workThreads[i].init(this);
}
} else {
workThreads=NULL;
}
}
DivWorkPool::~DivWorkPool() {
if (threaded) {
for (unsigned int i=0; i<count; i++) {
workThreads[i].finish();
}
delete[] workThreads;
}
}

View file

@ -22,18 +22,46 @@
#include <thread>
#include <mutex>
#include <atomic>
#include <functional>
#include <condition_variable>
#include "fixedQueue.h"
class DivWorkPool;
struct DivPendingTask {
std::function<void(void*)> func;
void* funcArg;
DivPendingTask(std::function<void(void*)> f, void* arg):
func(f),
funcArg(arg) {}
DivPendingTask():
func(NULL),
funcArg(NULL) {}
};
struct DivWorkThread {
DivWorkPool* parent;
std::mutex lock;
std::mutex selfLock;
std::thread* thread;
std::condition_variable notify;
bool busy, terminate;
FixedQueue<DivPendingTask,32> tasks;
std::atomic<bool> isBusy;
bool terminate;
void run();
bool assign(const std::function<void(void*)>& what, void* arg);
void wait();
bool busy();
void finish();
void init(DivWorkPool* p);
DivWorkThread():
busy(false) {}
parent(NULL),
isBusy(false),
terminate(false) {}
};
/**
@ -41,13 +69,20 @@ struct DivWorkThread {
* it is highly recommended to use `new` when allocating a DivWorkPool.
*/
class DivWorkPool {
bool threaded;
std::mutex selfLock;
unsigned int count;
unsigned int pos;
DivWorkThread* workThreads;
public:
std::condition_variable notify;
std::atomic<int> busyCount;
/**
* push a new job to this work pool.
* if all work threads are busy, this will block until one is free.
*/
bool push();
void push(const std::function<void(void*)>& what, void* arg);
/**
* check whether this work pool is busy.
@ -57,7 +92,7 @@ class DivWorkPool {
/**
* wait for all work threads to finish.
*/
bool wait();
void wait();
DivWorkPool(unsigned int threads=0);
~DivWorkPool();

View file

@ -367,6 +367,12 @@ void FurnaceGUI::drawChanOsc() {
ImGuiStyle& style=ImGui::GetStyle();
ImVec2 waveform[1024];
// check work thread
if (chanOscWorkPool==NULL) {
logV("creating chan osc work pool");
chanOscWorkPool=new DivWorkPool(settings.chanOscThreads);
}
// fill buffers
for (int i=0; i<chans; i++) {
DivDispatchOscBuffer* buf=e->getOscBuffer(i);
@ -379,137 +385,144 @@ void FurnaceGUI::drawChanOsc() {
// process
for (size_t i=0; i<oscBufs.size(); i++) {
DivDispatchOscBuffer* buf=oscBufs[i];
ChanOscStatus* fft=oscFFTs[i];
int ch=oscChans[i];
ChanOscStatus* fft_=oscFFTs[i];
if (buf!=NULL) {
fft_->relatedBuf=oscBufs[i];
fft_->relatedCh=oscChans[i];
if (fft_->relatedBuf!=NULL) {
// prepare
if (centerSettingReset) {
buf->readNeedle=buf->needle;
fft_->relatedBuf->readNeedle=fft_->relatedBuf->needle;
}
int displaySize=(float)(buf->rate)*(chanOscWindowSize/1000.0f);
// check FFT status existence
if (!fft->ready) {
logD("creating FFT plan for channel %d",ch);
fft->inBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double));
fft->outBuf=(fftw_complex*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(fftw_complex));
fft->corrBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double));
fft->plan=fftw_plan_dft_r2c_1d(FURNACE_FFT_SIZE,fft->inBuf,fft->outBuf,FFTW_ESTIMATE);
fft->planI=fftw_plan_dft_c2r_1d(FURNACE_FFT_SIZE,fft->outBuf,fft->corrBuf,FFTW_ESTIMATE);
if (fft->plan==NULL) {
if (!fft_->ready) {
logD("creating FFT plan for channel %d",fft_->relatedCh);
fft_->inBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double));
fft_->outBuf=(fftw_complex*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(fftw_complex));
fft_->corrBuf=(double*)fftw_malloc(FURNACE_FFT_SIZE*sizeof(double));
fft_->plan=fftw_plan_dft_r2c_1d(FURNACE_FFT_SIZE,fft_->inBuf,fft_->outBuf,FFTW_ESTIMATE);
fft_->planI=fftw_plan_dft_c2r_1d(FURNACE_FFT_SIZE,fft_->outBuf,fft_->corrBuf,FFTW_ESTIMATE);
if (fft_->plan==NULL) {
logE("failed to create plan!");
} else if (fft->planI==NULL) {
} else if (fft_->planI==NULL) {
logE("failed to create inverse plan!");
} else if (fft->inBuf==NULL || fft->outBuf==NULL || fft->corrBuf==NULL) {
} else if (fft_->inBuf==NULL || fft_->outBuf==NULL || fft_->corrBuf==NULL) {
logE("failed to create FFT buffers");
} else {
fft->ready=true;
fft_->ready=true;
}
}
if (fft->ready && e->isRunning()) {
// the STRATEGY
// 1. FFT of windowed signal
// 2. inverse FFT of auto-correlation
// 3. find size of one period
// 4. DFT of the fundamental of ONE PERIOD
// 5. now we can get phase information
//
// I have a feeling this could be simplified to two FFTs or even one...
// if you know how, please tell me
if (fft_->ready && e->isRunning()) {
chanOscWorkPool->push([this](void* fft_v) {
ChanOscStatus* fft=(ChanOscStatus*)fft_v;
DivDispatchOscBuffer* buf=fft->relatedBuf;
int ch=fft->relatedCh;
// initialization
double phase=0.0;
fft->loudEnough=false;
fft->needle=buf->needle;
// the STRATEGY
// 1. FFT of windowed signal
// 2. inverse FFT of auto-correlation
// 3. find size of one period
// 4. DFT of the fundamental of ONE PERIOD
// 5. now we can get phase information
//
// I have a feeling this could be simplified to two FFTs or even one...
// if you know how, please tell me
// first FFT
for (int j=0; j<FURNACE_FFT_SIZE; j++) {
fft->inBuf[j]=(double)buf->data[(unsigned short)(fft->needle-displaySize*2+((j*displaySize*2)/(FURNACE_FFT_SIZE)))]/32768.0;
if (fft->inBuf[j]>0.001 || fft->inBuf[j]<-0.001) fft->loudEnough=true;
fft->inBuf[j]*=0.55-0.45*cos(M_PI*(double)j/(double)(FURNACE_FFT_SIZE>>1));
}
// initialization
double phase=0.0;
int displaySize=(float)(buf->rate)*(chanOscWindowSize/1000.0f);
fft->loudEnough=false;
fft->needle=buf->needle;
// only proceed if not quiet
if (fft->loudEnough) {
fftw_execute(fft->plan);
// auto-correlation and second FFT
// first FFT
for (int j=0; j<FURNACE_FFT_SIZE; j++) {
fft->outBuf[j][0]/=FURNACE_FFT_SIZE;
fft->outBuf[j][1]/=FURNACE_FFT_SIZE;
fft->outBuf[j][0]=fft->outBuf[j][0]*fft->outBuf[j][0]+fft->outBuf[j][1]*fft->outBuf[j][1];
fft->outBuf[j][1]=0;
}
fft->outBuf[0][0]=0;
fft->outBuf[0][1]=0;
fft->outBuf[1][0]=0;
fft->outBuf[1][1]=0;
fftw_execute(fft->planI);
// window
for (int j=0; j<(FURNACE_FFT_SIZE>>1); j++) {
fft->corrBuf[j]*=1.0-((double)j/(double)(FURNACE_FFT_SIZE<<1));
fft->inBuf[j]=(double)buf->data[(unsigned short)(fft->needle-displaySize*2+((j*displaySize*2)/(FURNACE_FFT_SIZE)))]/32768.0;
if (fft->inBuf[j]>0.001 || fft->inBuf[j]<-0.001) fft->loudEnough=true;
fft->inBuf[j]*=0.55-0.45*cos(M_PI*(double)j/(double)(FURNACE_FFT_SIZE>>1));
}
// find size of period
double waveLenCandL=DBL_MAX;
double waveLenCandH=DBL_MIN;
fft->waveLen=FURNACE_FFT_SIZE-1;
fft->waveLenBottom=0;
fft->waveLenTop=0;
// only proceed if not quiet
if (fft->loudEnough) {
fftw_execute(fft->plan);
// find lowest point
for (int j=(FURNACE_FFT_SIZE>>2); j>2; j--) {
if (fft->corrBuf[j]<waveLenCandL) {
waveLenCandL=fft->corrBuf[j];
fft->waveLenBottom=j;
// auto-correlation and second FFT
for (int j=0; j<FURNACE_FFT_SIZE; j++) {
fft->outBuf[j][0]/=FURNACE_FFT_SIZE;
fft->outBuf[j][1]/=FURNACE_FFT_SIZE;
fft->outBuf[j][0]=fft->outBuf[j][0]*fft->outBuf[j][0]+fft->outBuf[j][1]*fft->outBuf[j][1];
fft->outBuf[j][1]=0;
}
}
// find highest point
for (int j=(FURNACE_FFT_SIZE>>1)-1; j>fft->waveLenBottom; j--) {
if (fft->corrBuf[j]>waveLenCandH) {
waveLenCandH=fft->corrBuf[j];
fft->waveLen=j;
}
}
fft->waveLenTop=fft->waveLen;
fft->outBuf[0][0]=0;
fft->outBuf[0][1]=0;
fft->outBuf[1][0]=0;
fft->outBuf[1][1]=0;
fftw_execute(fft->planI);
// did we find the period size?
if (fft->waveLen<(FURNACE_FFT_SIZE-32)) {
// we got pitch
chanOscPitch[ch]=pow(1.0-(fft->waveLen/(double)(FURNACE_FFT_SIZE>>1)),4.0);
// window
for (int j=0; j<(FURNACE_FFT_SIZE>>1); j++) {
fft->corrBuf[j]*=1.0-((double)j/(double)(FURNACE_FFT_SIZE<<1));
}
// find size of period
double waveLenCandL=DBL_MAX;
double waveLenCandH=DBL_MIN;
fft->waveLen=FURNACE_FFT_SIZE-1;
fft->waveLenBottom=0;
fft->waveLenTop=0;
// find lowest point
for (int j=(FURNACE_FFT_SIZE>>2); j>2; j--) {
if (fft->corrBuf[j]<waveLenCandL) {
waveLenCandL=fft->corrBuf[j];
fft->waveLenBottom=j;
}
}
fft->waveLen*=(double)displaySize*2.0/(double)FURNACE_FFT_SIZE;
// DFT of one period (x_1)
double dft[2];
dft[0]=0.0;
dft[1]=0.0;
for (int j=fft->needle-1-(displaySize>>1)-(int)fft->waveLen, k=0; k<fft->waveLen; j++, k++) {
double one=((double)buf->data[j&0xffff]/32768.0);
double two=(double)k*(-2.0*M_PI)/fft->waveLen;
dft[0]+=one*cos(two);
dft[1]+=one*sin(two);
// find highest point
for (int j=(FURNACE_FFT_SIZE>>1)-1; j>fft->waveLenBottom; j--) {
if (fft->corrBuf[j]>waveLenCandH) {
waveLenCandH=fft->corrBuf[j];
fft->waveLen=j;
}
}
fft->waveLenTop=fft->waveLen;
// calculate and lock into phase
phase=(0.5+(atan2(dft[1],dft[0])/(2.0*M_PI)));
// did we find the period size?
if (fft->waveLen<(FURNACE_FFT_SIZE-32)) {
// we got pitch
chanOscPitch[ch]=pow(1.0-(fft->waveLen/(double)(FURNACE_FFT_SIZE>>1)),4.0);
fft->waveLen*=(double)displaySize*2.0/(double)FURNACE_FFT_SIZE;
if (chanOscWaveCorr) {
fft->needle-=phase*fft->waveLen;
// DFT of one period (x_1)
double dft[2];
dft[0]=0.0;
dft[1]=0.0;
for (int j=fft->needle-1-(displaySize>>1)-(int)fft->waveLen, k=0; k<fft->waveLen; j++, k++) {
double one=((double)buf->data[j&0xffff]/32768.0);
double two=(double)k*(-2.0*M_PI)/fft->waveLen;
dft[0]+=one*cos(two);
dft[1]+=one*sin(two);
}
// calculate and lock into phase
phase=(0.5+(atan2(dft[1],dft[0])/(2.0*M_PI)));
if (chanOscWaveCorr) {
fft->needle-=phase*fft->waveLen;
}
}
}
}
fft->needle-=displaySize;
fft->needle-=displaySize;
},fft_);
}
}
}
chanOscWorkPool->wait();
// 0: none
// 1: sqrt(chans)
@ -644,7 +657,12 @@ void FurnaceGUI::drawChanOsc() {
}
ImGui::PushClipRect(inRect.Min,inRect.Max,false);
ImDrawListFlags prevFlags=dl->Flags;
//if (!settings.oscAntiAlias) {
dl->Flags&=~(ImDrawListFlags_AntiAliasedLines|ImDrawListFlags_AntiAliasedLinesUseTex);
//}
dl->AddPolyline(waveform,precision,color,ImDrawFlags_None,dpiScale);
dl->Flags=prevFlags;
if (!chanOscTextFormat.empty()) {
String text;

View file

@ -6685,6 +6685,9 @@ bool FurnaceGUI::init() {
}
#endif
cpuCores=SDL_GetCPUCount();
if (cpuCores<1) cpuCores=1;
logI("done!");
return true;
}
@ -6857,6 +6860,10 @@ bool FurnaceGUI::finish() {
backupTask.get();
}
if (chanOscWorkPool!=NULL) {
delete chanOscWorkPool;
}
return true;
}
@ -7281,6 +7288,7 @@ FurnaceGUI::FurnaceGUI():
chanOscTextColor(1.0f,1.0f,1.0f,0.75f),
chanOscGrad(64,64),
chanOscGradTex(NULL),
chanOscWorkPool(NULL),
followLog(true),
#ifdef IS_MOBILE
pianoOctaves(7),

View file

@ -21,6 +21,7 @@
#define _FUR_GUI_H
#include "../engine/engine.h"
#include "../engine/workPool.h"
#include "../engine/waveSynth.h"
#include "imgui.h"
#include "imgui_impl_sdl2.h"
@ -1346,6 +1347,7 @@ class FurnaceGUI {
int mobileEditPage;
int wheelCalmDown;
int shallDetectScale;
int cpuCores;
float mobileMenuPos, autoButtonSize, mobileEditAnim;
ImVec2 mobileEditButtonPos, mobileEditButtonSize;
const int* curSysSection;
@ -1572,6 +1574,7 @@ class FurnaceGUI {
int insIconsStyle;
int classicChipOptions;
int wasapiEx;
int chanOscThreads;
unsigned int maxUndoSteps;
String mainFontPath;
String headFontPath;
@ -1747,6 +1750,7 @@ class FurnaceGUI {
insIconsStyle(1),
classicChipOptions(0),
wasapiEx(0),
chanOscThreads(0),
maxUndoSteps(100),
mainFontPath(""),
headFontPath(""),
@ -2047,6 +2051,7 @@ class FurnaceGUI {
ImVec4 chanOscColor, chanOscTextColor;
Gradient2D chanOscGrad;
FurnaceGUITexture* chanOscGradTex;
DivWorkPool* chanOscWorkPool;
float chanOscLP0[DIV_MAX_CHANS];
float chanOscLP1[DIV_MAX_CHANS];
float chanOscVol[DIV_MAX_CHANS];
@ -2058,10 +2063,11 @@ class FurnaceGUI {
double* inBuf;
fftw_complex* outBuf;
double* corrBuf;
DivDispatchOscBuffer* relatedBuf;
size_t inBufPos;
double inBufPosFrac;
double waveLen;
int waveLenBottom, waveLenTop;
int waveLenBottom, waveLenTop, relatedCh;
unsigned short needle;
bool ready, loudEnough;
fftw_plan plan;
@ -2070,11 +2076,13 @@ class FurnaceGUI {
inBuf(NULL),
outBuf(NULL),
corrBuf(NULL),
relatedBuf(NULL),
inBufPos(0),
inBufPosFrac(0.0f),
waveLen(0.0),
waveLenBottom(0),
waveLenTop(0),
relatedCh(0),
needle(0),
ready(false),
loudEnough(false),

View file

@ -400,6 +400,27 @@ void FurnaceGUI::drawSettings() {
ImGui::SetTooltip("may cause issues with high-polling-rate mice when previewing notes.");
}
pushWarningColor(settings.chanOscThreads>cpuCores,settings.chanOscThreads>(cpuCores*2));
if (ImGui::InputInt("Per-channel oscilloscope threads",&settings.chanOscThreads)) {
if (settings.chanOscThreads<0) settings.chanOscThreads=0;
if (settings.chanOscThreads>(cpuCores*3)) settings.chanOscThreads=cpuCores*3;
if (settings.chanOscThreads>256) settings.chanOscThreads=256;
}
if (settings.chanOscThreads>=(cpuCores*3)) {
if (ImGui::IsItemHovered()) {
ImGui::SetTooltip("you're being silly, aren't you? that's enough.");
}
} else if (settings.chanOscThreads>(cpuCores*2)) {
if (ImGui::IsItemHovered()) {
ImGui::SetTooltip("what are you doing? stop!");
}
} else if (settings.chanOscThreads>cpuCores) {
if (ImGui::IsItemHovered()) {
ImGui::SetTooltip("it is a bad idea to set this number higher than your CPU core count (%d)!",cpuCores);
}
}
popWarningColor();
// SUBSECTION FILE
CONFIG_SUBSECTION("File");
@ -3262,6 +3283,7 @@ void FurnaceGUI::syncSettings() {
settings.insIconsStyle=e->getConfInt("insIconsStyle",1);
settings.classicChipOptions=e->getConfInt("classicChipOptions",0);
settings.wasapiEx=e->getConfInt("wasapiEx",0);
settings.chanOscThreads=e->getConfInt("chanOscThreads",0);
clampSetting(settings.mainFontSize,2,96);
clampSetting(settings.headFontSize,2,96);
@ -3410,6 +3432,7 @@ void FurnaceGUI::syncSettings() {
clampSetting(settings.insIconsStyle,0,2);
clampSetting(settings.classicChipOptions,0,1);
clampSetting(settings.wasapiEx,0,1);
clampSetting(settings.chanOscThreads,0,256);
if (settings.exportLoops<0.0) settings.exportLoops=0.0;
if (settings.exportFadeOut<0.0) settings.exportFadeOut=0.0;
@ -3665,6 +3688,7 @@ void FurnaceGUI::commitSettings() {
e->setConf("insIconsStyle",settings.insIconsStyle);
e->setConf("classicChipOptions",settings.classicChipOptions);
e->setConf("wasapiEx",settings.wasapiEx);
e->setConf("chanOscThreads",settings.chanOscThreads);
// colors
for (int i=0; i<GUI_COLOR_MAX; i++) {
@ -4182,6 +4206,12 @@ void FurnaceGUI::applyUISettings(bool updateFonts) {
}
}
}
// chan osc work pool
if (chanOscWorkPool!=NULL) {
delete chanOscWorkPool;
chanOscWorkPool=NULL;
}
// colors
if (updateFonts) {