diff --git a/CMakeLists.txt b/CMakeLists.txt
index 731e5e87e..b3e557e1c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,14 @@ if (HAVE_JACK)
   list(APPEND AUDIO_SOURCES src/audio/jack.cpp)
 endif()
 
-set(ENGINE_SOURCES src/log.cpp src/engine/safeReader.cpp src/engine/engine.cpp)
+set(ENGINE_SOURCES
+src/log.cpp
+src/engine/blip_buf.c
+src/engine/safeReader.cpp
+src/engine/engine.cpp
+src/engine/playback.cpp
+src/engine/platform/abstract.cpp
+src/engine/platform/dummy.cpp)
 
 #imgui/imgui.cpp
 #imgui/imgui_demo.cpp
diff --git a/src/audio/abstract.cpp b/src/audio/abstract.cpp
index 93981801b..fe5093024 100644
--- a/src/audio/abstract.cpp
+++ b/src/audio/abstract.cpp
@@ -8,8 +8,9 @@ void TAAudio::setBufferSizeChangeCallback(void (*callback)(BufferSizeChangeEvent
   bufferSizeChanged=callback;
 }
 
-void TAAudio::setCallback(void (*callback)(float**,float**,int,int,unsigned int)) {
+void TAAudio::setCallback(void (*callback)(void*,float**,float**,int,int,unsigned int), void* user) {
   audioProcCallback=callback;
+  audioProcCallbackUser=user;
 }
 
 void* TAAudio::getContext() {
diff --git a/src/audio/jack.cpp b/src/audio/jack.cpp
index 52bdd625e..833e5ef56 100644
--- a/src/audio/jack.cpp
+++ b/src/audio/jack.cpp
@@ -33,7 +33,7 @@ void TAAudioJACK::onBufferSize(jack_nframes_t bufsize) {
 
 void TAAudioJACK::onProcess(jack_nframes_t nframes) {
   if (audioProcCallback!=NULL) {
-    audioProcCallback(inBufs,outBufs,desc.inChans,desc.outChans,desc.bufsize);
+    audioProcCallback(audioProcCallbackUser,inBufs,outBufs,desc.inChans,desc.outChans,desc.bufsize);
   }
   for (int i=0; i<desc.inChans; i++) {
     iInBufs[i]=(float*)jack_port_get_buffer(ai[i],nframes);
diff --git a/src/audio/sdl.cpp b/src/audio/sdl.cpp
index 25658fead..872432c92 100644
--- a/src/audio/sdl.cpp
+++ b/src/audio/sdl.cpp
@@ -8,7 +8,7 @@ void taSDLProcess(void* inst, unsigned char* buf, int nframes) {
 
 void TAAudioSDL::onProcess(unsigned char* buf, int nframes) {
   if (audioProcCallback!=NULL) {
-    audioProcCallback(inBufs,outBufs,desc.inChans,desc.outChans,desc.bufsize);
+    audioProcCallback(audioProcCallbackUser,inBufs,outBufs,desc.inChans,desc.outChans,desc.bufsize);
   }
   float* fbuf=(float*)buf;
   for (size_t j=0; j<desc.bufsize; j++) {
diff --git a/src/audio/taAudio.h b/src/audio/taAudio.h
index 0801932cd..8e8084383 100644
--- a/src/audio/taAudio.h
+++ b/src/audio/taAudio.h
@@ -52,14 +52,15 @@ class TAAudio {
     bool running, initialized;
     float** inBufs;
     float** outBufs;
-    void (*audioProcCallback)(float**,float**,int,int,unsigned int);
+    void (*audioProcCallback)(void*,float**,float**,int,int,unsigned int);
+    void* audioProcCallbackUser;
     void (*sampleRateChanged)(SampleRateChangeEvent);
     void (*bufferSizeChanged)(BufferSizeChangeEvent);
   public:
     void setSampleRateChangeCallback(void (*callback)(SampleRateChangeEvent));
     void setBufferSizeChangeCallback(void (*callback)(BufferSizeChangeEvent));
 
-    void setCallback(void (*callback)(float**,float**,int,int,unsigned int));
+    void setCallback(void (*callback)(void*,float**,float**,int,int,unsigned int), void* user);
 
     virtual void* getContext();
     virtual bool quit();
diff --git a/src/engine/blip_buf.c b/src/engine/blip_buf.c
new file mode 100644
index 000000000..929b8fc66
--- /dev/null
+++ b/src/engine/blip_buf.c
@@ -0,0 +1,344 @@
+/* blip_buf $vers. http://www.slack.net/~ant/ */
+
+#include "blip_buf.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* Library Copyright (C) 2003-2009 Shay Green. This library is free software;
+you can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+library is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#if defined (BLARGG_TEST) && BLARGG_TEST
+	#include "blargg_test.h"
+#endif
+
+/* Equivalent to ULONG_MAX >= 0xFFFFFFFF00000000.
+Avoids constants that don't fit in 32 bits. */
+#if ULONG_MAX/0xFFFFFFFF > 0xFFFFFFFF
+	typedef unsigned long fixed_t;
+	enum { pre_shift = 32 };
+
+#elif defined(ULLONG_MAX)
+	typedef unsigned long long fixed_t;
+	enum { pre_shift = 32 };
+
+#else
+	typedef unsigned fixed_t;
+	enum { pre_shift = 0 };
+
+#endif
+
+enum { time_bits = pre_shift + 20 };
+
+static fixed_t const time_unit = (fixed_t) 1 << time_bits;
+
+enum { bass_shift  = 9 }; /* affects high-pass filter breakpoint frequency */
+enum { end_frame_extra = 2 }; /* allows deltas slightly after frame length */
+
+enum { half_width  = 8 };
+enum { buf_extra   = half_width*2 + end_frame_extra };
+enum { phase_bits  = 5 };
+enum { phase_count = 1 << phase_bits };
+enum { delta_bits  = 15 };
+enum { delta_unit  = 1 << delta_bits };
+enum { frac_bits = time_bits - pre_shift };
+
+/* We could eliminate avail and encode whole samples in offset, but that would
+limit the total buffered samples to blip_max_frame. That could only be
+increased by decreasing time_bits, which would reduce resample ratio accuracy.
+*/
+
+/** Sample buffer that resamples to output rate and accumulates samples
+until they're read out */
+struct blip_t
+{
+	fixed_t factor;
+	fixed_t offset;
+	int avail;
+	int size;
+	int integrator;
+};
+
+typedef int buf_t;
+
+/* probably not totally portable */
+#define SAMPLES( buf ) ((buf_t*) ((buf) + 1))
+
+/* Arithmetic (sign-preserving) right shift */
+#define ARITH_SHIFT( n, shift ) \
+	((n) >> (shift))
+
+enum { max_sample = +32767 };
+enum { min_sample = -32768 };
+
+#define CLAMP( n ) \
+	{\
+		if ( (short) n != n )\
+			n = ARITH_SHIFT( n, 16 ) ^ max_sample;\
+	}
+
+static void check_assumptions( void )
+{
+	int n;
+	
+	#if INT_MAX < 0x7FFFFFFF || UINT_MAX < 0xFFFFFFFF
+		#error "int must be at least 32 bits"
+	#endif
+	
+	assert( (-3 >> 1) == -2 ); /* right shift must preserve sign */
+	
+	n = max_sample * 2;
+	CLAMP( n );
+	assert( n == max_sample );
+	
+	n = min_sample * 2;
+	CLAMP( n );
+	assert( n == min_sample );
+	
+	assert( blip_max_ratio <= time_unit );
+	assert( blip_max_frame <= (fixed_t) -1 >> time_bits );
+}
+
+blip_t* blip_new( int size )
+{
+	blip_t* m;
+	assert( size >= 0 );
+	
+	m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) );
+	if ( m )
+	{
+		m->factor = time_unit / blip_max_ratio;
+		m->size   = size;
+		blip_clear( m );
+		check_assumptions();
+	}
+	return m;
+}
+
+void blip_delete( blip_t* m )
+{
+	if ( m != NULL )
+	{
+		/* Clear fields in case user tries to use after freeing */
+		memset( m, 0, sizeof *m );
+		free( m );
+	}
+}
+
+void blip_set_rates( blip_t* m, double clock_rate, double sample_rate )
+{
+	double factor = time_unit * sample_rate / clock_rate;
+	m->factor = (fixed_t) factor;
+	
+	/* Fails if clock_rate exceeds maximum, relative to sample_rate */
+	assert( 0 <= factor - m->factor && factor - m->factor < 1 );
+	
+	/* Avoid requiring math.h. Equivalent to
+	m->factor = (int) ceil( factor ) */
+	if ( m->factor < factor )
+		m->factor++;
+	
+	/* At this point, factor is most likely rounded up, but could still
+	have been rounded down in the floating-point calculation. */
+}
+
+void blip_clear( blip_t* m )
+{
+	/* We could set offset to 0, factor/2, or factor-1. 0 is suitable if
+	factor is rounded up. factor-1 is suitable if factor is rounded down.
+	Since we don't know rounding direction, factor/2 accommodates either,
+	with the slight loss of showing an error in half the time. Since for
+	a 64-bit factor this is years, the halving isn't a problem. */
+	
+	m->offset     = m->factor / 2;
+	m->avail      = 0;
+	m->integrator = 0;
+	memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) );
+}
+
+int blip_clocks_needed( const blip_t* m, int samples )
+{
+	fixed_t needed;
+	
+	/* Fails if buffer can't hold that many more samples */
+	assert( samples >= 0 && m->avail + samples <= m->size );
+	
+	needed = (fixed_t) samples * time_unit;
+	if ( needed < m->offset )
+		return 0;
+	
+	return (needed - m->offset + m->factor - 1) / m->factor;
+}
+
+void blip_end_frame( blip_t* m, unsigned t )
+{
+	fixed_t off = t * m->factor + m->offset;
+	m->avail += off >> time_bits;
+	m->offset = off & (time_unit - 1);
+	
+	/* Fails if buffer size was exceeded */
+	assert( m->avail <= m->size );
+}
+
+int blip_samples_avail( const blip_t* m )
+{
+	return m->avail;
+}
+
+static void remove_samples( blip_t* m, int count )
+{
+	buf_t* buf = SAMPLES( m );
+	int remain = m->avail + buf_extra - count;
+	m->avail -= count;
+	
+	memmove( &buf [0], &buf [count], remain * sizeof buf [0] );
+	memset( &buf [remain], 0, count * sizeof buf [0] );
+}
+
+int blip_read_samples( blip_t* m, short out [], int count, int stereo )
+{
+	assert( count >= 0 );
+	
+	if ( count > m->avail )
+		count = m->avail;
+	
+	if ( count )
+	{
+		int const step = stereo ? 2 : 1;
+		buf_t const* in  = SAMPLES( m );
+		buf_t const* end = in + count;
+		int sum = m->integrator;
+		do
+		{
+			/* Eliminate fraction */
+			int s = ARITH_SHIFT( sum, delta_bits );
+			
+			sum += *in++;
+			
+			CLAMP( s );
+			
+			*out = s;
+			out += step;
+			
+			/* High-pass filter */
+			sum -= s << (delta_bits - bass_shift);
+		}
+		while ( in != end );
+		m->integrator = sum;
+		
+		remove_samples( m, count );
+	}
+	
+	return count;
+}
+
+/* Things that didn't help performance on x86:
+	__attribute__((aligned(128)))
+	#define short int
+	restrict
+*/
+
+/* Sinc_Generator( 0.9, 0.55, 4.5 ) */
+static short const bl_step [phase_count + 1] [half_width] =
+{
+{   43, -115,  350, -488, 1136, -914, 5861,21022},
+{   44, -118,  348, -473, 1076, -799, 5274,21001},
+{   45, -121,  344, -454, 1011, -677, 4706,20936},
+{   46, -122,  336, -431,  942, -549, 4156,20829},
+{   47, -123,  327, -404,  868, -418, 3629,20679},
+{   47, -122,  316, -375,  792, -285, 3124,20488},
+{   47, -120,  303, -344,  714, -151, 2644,20256},
+{   46, -117,  289, -310,  634,  -17, 2188,19985},
+{   46, -114,  273, -275,  553,  117, 1758,19675},
+{   44, -108,  255, -237,  471,  247, 1356,19327},
+{   43, -103,  237, -199,  390,  373,  981,18944},
+{   42,  -98,  218, -160,  310,  495,  633,18527},
+{   40,  -91,  198, -121,  231,  611,  314,18078},
+{   38,  -84,  178,  -81,  153,  722,   22,17599},
+{   36,  -76,  157,  -43,   80,  824, -241,17092},
+{   34,  -68,  135,   -3,    8,  919, -476,16558},
+{   32,  -61,  115,   34,  -60, 1006, -683,16001},
+{   29,  -52,   94,   70, -123, 1083, -862,15422},
+{   27,  -44,   73,  106, -184, 1152,-1015,14824},
+{   25,  -36,   53,  139, -239, 1211,-1142,14210},
+{   22,  -27,   34,  170, -290, 1261,-1244,13582},
+{   20,  -20,   16,  199, -335, 1301,-1322,12942},
+{   18,  -12,   -3,  226, -375, 1331,-1376,12293},
+{   15,   -4,  -19,  250, -410, 1351,-1408,11638},
+{   13,    3,  -35,  272, -439, 1361,-1419,10979},
+{   11,    9,  -49,  292, -464, 1362,-1410,10319},
+{    9,   16,  -63,  309, -483, 1354,-1383, 9660},
+{    7,   22,  -75,  322, -496, 1337,-1339, 9005},
+{    6,   26,  -85,  333, -504, 1312,-1280, 8355},
+{    4,   31,  -94,  341, -507, 1278,-1205, 7713},
+{    3,   35, -102,  347, -506, 1238,-1119, 7082},
+{    1,   40, -110,  350, -499, 1190,-1021, 6464},
+{    0,   43, -115,  350, -488, 1136, -914, 5861}
+};
+
+/* Shifting by pre_shift allows calculation using unsigned int rather than
+possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient.
+And by having pre_shift 32, a 32-bit platform can easily do the shift by
+simply ignoring the low half. */
+
+void blip_add_delta( blip_t* m, unsigned time, int delta )
+{
+	unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
+	buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
+	
+	int const phase_shift = frac_bits - phase_bits;
+	int phase = fixed >> phase_shift & (phase_count - 1);
+	short const* in  = bl_step [phase];
+	short const* rev = bl_step [phase_count - phase];
+	
+	int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1);
+	int delta2 = (delta * interp) >> delta_bits;
+	delta -= delta2;
+	
+	/* Fails if buffer size was exceeded */
+	assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
+	
+	out [0] += in[0]*delta + in[half_width+0]*delta2;
+	out [1] += in[1]*delta + in[half_width+1]*delta2;
+	out [2] += in[2]*delta + in[half_width+2]*delta2;
+	out [3] += in[3]*delta + in[half_width+3]*delta2;
+	out [4] += in[4]*delta + in[half_width+4]*delta2;
+	out [5] += in[5]*delta + in[half_width+5]*delta2;
+	out [6] += in[6]*delta + in[half_width+6]*delta2;
+	out [7] += in[7]*delta + in[half_width+7]*delta2;
+	
+	in = rev;
+	out [ 8] += in[7]*delta + in[7-half_width]*delta2;
+	out [ 9] += in[6]*delta + in[6-half_width]*delta2;
+	out [10] += in[5]*delta + in[5-half_width]*delta2;
+	out [11] += in[4]*delta + in[4-half_width]*delta2;
+	out [12] += in[3]*delta + in[3-half_width]*delta2;
+	out [13] += in[2]*delta + in[2-half_width]*delta2;
+	out [14] += in[1]*delta + in[1-half_width]*delta2;
+	out [15] += in[0]*delta + in[0-half_width]*delta2;
+}
+
+void blip_add_delta_fast( blip_t* m, unsigned time, int delta )
+{
+	unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
+	buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
+	
+	int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1);
+	int delta2 = delta * interp;
+	
+	/* Fails if buffer size was exceeded */
+	assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
+	
+	out [7] += delta * delta_unit - delta2;
+	out [8] += delta2;
+}
diff --git a/src/engine/blip_buf.h b/src/engine/blip_buf.h
new file mode 100644
index 000000000..0853595ef
--- /dev/null
+++ b/src/engine/blip_buf.h
@@ -0,0 +1,72 @@
+/** \file
+Sample buffer that resamples from input clock rate to output sample rate */
+
+/* blip_buf $vers */
+#ifndef BLIP_BUF_H 
+#define BLIP_BUF_H
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+/** First parameter of most functions is blip_t*, or const blip_t* if nothing
+is changed. */
+typedef struct blip_t blip_t;
+
+/** Creates new buffer that can hold at most sample_count samples. Sets rates
+so that there are blip_max_ratio clocks per sample. Returns pointer to new
+buffer, or NULL if insufficient memory. */
+blip_t* blip_new( int sample_count );
+
+/** Sets approximate input clock rate and output sample rate. For every
+clock_rate input clocks, approximately sample_rate samples are generated. */
+void blip_set_rates( blip_t*, double clock_rate, double sample_rate );
+
+enum { /** Maximum clock_rate/sample_rate ratio. For a given sample_rate,
+clock_rate must not be greater than sample_rate*blip_max_ratio. */
+blip_max_ratio = 1 << 20 };
+
+/** Clears entire buffer. Afterwards, blip_samples_avail() == 0. */
+void blip_clear( blip_t* );
+
+/** Adds positive/negative delta into buffer at specified clock time. */
+void blip_add_delta( blip_t*, unsigned int clock_time, int delta );
+
+/** Same as blip_add_delta(), but uses faster, lower-quality synthesis. */
+void blip_add_delta_fast( blip_t*, unsigned int clock_time, int delta );
+
+/** Length of time frame, in clocks, needed to make sample_count additional
+samples available. */
+int blip_clocks_needed( const blip_t*, int sample_count );
+
+enum { /** Maximum number of samples that can be generated from one time frame. */
+blip_max_frame = 4000 };
+
+/** Makes input clocks before clock_duration available for reading as output
+samples. Also begins new time frame at clock_duration, so that clock time 0 in
+the new time frame specifies the same clock as clock_duration in the old time
+frame specified. Deltas can have been added slightly past clock_duration (up to
+however many clocks there are in two output samples). */
+void blip_end_frame( blip_t*, unsigned int clock_duration );
+
+/** Number of buffered samples available for reading. */
+int blip_samples_avail( const blip_t* );
+
+/** Reads and removes at most 'count' samples and writes them to 'out'. If
+'stereo' is true, writes output to every other element of 'out', allowing easy
+interleaving of two buffers into a stereo sample stream. Outputs 16-bit signed
+samples. Returns number of samples actually read.  */
+int blip_read_samples( blip_t*, short out [], int count, int stereo );
+
+/** Frees buffer. No effect if NULL is passed. */
+void blip_delete( blip_t* );
+
+
+/* Deprecated */
+typedef blip_t blip_buffer_t;
+
+#ifdef __cplusplus
+	}
+#endif
+
+#endif
diff --git a/src/engine/blip_buf.txt b/src/engine/blip_buf.txt
new file mode 100644
index 000000000..5bf7a3d09
--- /dev/null
+++ b/src/engine/blip_buf.txt
@@ -0,0 +1,250 @@
+blip_buf $vers
+--------------
+Author  : Shay Green <gblargg@gmail.com>
+Website : http://www.slack.net/~ant/
+License : GNU Lesser General Public License (LGPL)
+
+
+Contents
+--------
+* Overview
+* Buffer creation
+* Waveform generation
+* Time frames
+* Complex waveforms
+* Sample buffering
+* Thanks
+
+
+Overview
+--------
+This library resamples audio waveforms from input clock rate to output
+sample rate. Usage follows this general pattern:
+
+* Create buffer with blip_new().
+* Set clock rate and sample rate with blip_set_rates().
+* Waveform generation loop:
+	- Generate several clocks of waveform with blip_add_delta().
+	- End time frame with blip_end_frame().
+	- Read samples from buffer with blip_read_samples().
+* Free buffer with blip_delete().
+
+
+Buffer creation
+---------------
+Before synthesis, a buffer must be created with blip_new(). Its size is
+the maximum number of unread samples it can hold. For most uses, this
+can be 1/10 the sample rate or less, since samples will usually be read
+out immediately after being generated.
+
+After the buffer is created, the input clock rate and output sample rate
+must be set with blip_set_rates(). This determines how many input clocks
+there are per second, and how many output samples are generated per
+second.
+
+If the compiler supports a 64-bit integer type, then the input-output
+ratio is stored very accurately. If the compiler only supports a 32-bit
+integer type, then the ratio is stored with only 20 fraction bits, so
+some ratios cannot be represented exactly (for example, sample
+rate=48000 and clock rate=48001). The ratio is internally rounded up, so
+there will never be fewer than 'sample rate' samples per second. Having
+too many per second is generally better than having too few.
+
+
+Waveform generation
+-------------------
+Waveforms are generated at the input clock rate. Consider a simple
+square wave with 8 clocks per cycle (4 clocks high, 4 clocks low):
+
+                   |<-- 8 clocks ->|
+        +5|        ._._._._        ._._._._        ._._._._        ._._
+          |        |       |       |       |       |       |       |
+    Amp  0|._._._._        |       |       |       |       |       |
+          |                |       |       |       |       |       |
+        -5|                ._._._._        ._._._._        ._._._._ 
+           * . . . * . . . * . . . * . . . * . . . * . . . * . . . * .
+    Time   0       4       8      12      16      20      24      28
+
+The wave changes amplitude at time points 0, 4, 8, 12, 16, etc.
+
+The following generates the amplitude at every clock of above waveform
+at the input clock rate:
+
+	int wave [30];
+	
+	for ( int i = 4; i < 30; ++i )
+	{
+		if ( i % 8 < 4 )
+			wave [i] = -5;
+		else
+			wave [i] = +5;
+	}
+
+Without this library, the wave array would then need to be resampled
+from the input clock rate to the output sample rate. This library does
+this resampling internally, so it won't be discussed further; waveform
+generation code can focus entirely on the input clocks.
+
+Rather than specify the amplitude at every clock, this library merely
+needs to know the points where the amplitude CHANGES, referred to as a
+delta. The time of a delta is specified with a clock count. The deltas
+for this square wave are shown below the time points they occur at:
+
+        +5|        ._._._._        ._._._._        ._._._._        ._._
+          |        |       |       |       |       |       |       |
+    Amp  0|._._._._        |       |       |       |       |       |
+          |                |       |       |       |       |       |
+        -5|                ._._._._        ._._._._        ._._._._ 
+           * . . . * . . . * . . . * . . . * . . . * . . . * . . . * .
+    Time   0       4       8      12      16      20      24      28
+    Delta         +5     -10     +10     -10     +10     -10     +10
+
+The following calls generate the above waveform:
+
+	blip_add_delta( blip,  4,  +5 );
+	blip_add_delta( blip,  8, -10 );
+	blip_add_delta( blip, 12, +10 );
+	blip_add_delta( blip, 16, -10 );
+	blip_add_delta( blip, 20, +10 );
+	blip_add_delta( blip, 24, -10 );
+	blip_add_delta( blip, 28, +10 );
+
+In the examples above, the amplitudes are small for clarity. The 16-bit
+sample range is -32768 to +32767, so actual waveform amplitudes would
+need to be in the thousands to be audible (for example, -5000 to +5000).
+
+This library allows waveform generation code to pay NO attention to the
+output sample rate. It can focus ENTIRELY on the essence of the
+waveform: the points where its amplitude changes. Since these points can
+be efficiently generated in a loop, synthesis is efficient. Sound chip
+emulation code can be structured to allow full accuracy down to a single
+clock, with the emulated CPU being able to simply tell the sound chip to
+"emulate from wherever you left off, up to clock time T within the
+current time frame".
+
+
+Time frames
+-----------
+Since time keeps increasing, if left unchecked, at some point it would
+overflow the range of an integer. This library's solution to the problem
+is to break waveform generation into time frames of moderate length.
+Clock counts within a time frame are thus relative to the beginning of
+the frame, where 0 is the beginning of the frame. When a time frame of
+length T is ended, what was at time T in the old time frame is now at
+time 0 in the new time frame. Breaking the above waveform into time
+frames of 10 clocks each looks like this:
+
+        +5|        ._._._._        ._._._._        ._._._._        ._._
+          |        |       |       |       |       |       |       |
+    Amp  0|._._._._        |       |       |       |       |       |
+          |                |       |       |       |       |       |
+        -5|                ._._._._        ._._._._        ._._._._ 
+           * . . . * . . . * . . . * . . . * . . . * . . . * . . . * .
+    Time  |0       4       8  |    2       6      |0       4       8  |
+          | first time frame  | second time frame | third time frame  |
+          |<--- 10 clocks --->|<--- 10 clocks --->|<--- 10 clocks --->|
+
+The following calls generate the above waveform. After they execute, the
+first 30 clocks of the waveform will have been resampled and be
+available as output samples for reading with blip_read_samples().
+
+	blip_add_delta( blip,  4,  +5 );
+	blip_add_delta( blip,  8, -10 );
+	blip_end_frame( blip, 10 );
+	
+	blip_add_delta( blip,  2, +10 );
+	blip_add_delta( blip,  6, -10 );
+	blip_end_frame( blip, 10 );
+	
+	blip_add_delta( blip,  0, +10 );
+	blip_add_delta( blip,  4, -10 );
+	blip_add_delta( blip,  8, +10 );
+	blip_end_frame( blip, 10 );
+	...
+
+Time frames can be a convenient length, and the length can vary from one
+frame to the next. Once a time frame is ended, the resulting output
+samples become available for reading immediately, and no more deltas can
+be added to it.
+
+There is a limit of about 4000 output samples per time frame. The number
+of clocks depends on the clock rate. At common sample rates, this allows
+time frames of at least 1/15 second, plenty for most uses. This limit
+allows increased resampling ratio accuracy.
+
+In an emulator, it is usually convenient to have audio time frames
+correspond to video frames, where the CPU's clock counter is reset at
+the beginning of each video frame and thus can be used directly as the
+relative clock counts for audio time frames.
+
+
+Complex waveforms
+-----------------
+Any sort of waveform can be generated, not just a square wave. For
+example, a saw-like wave:
+
+        +5|        ._._._._                ._._._._                ._._
+          |        |       |               |       |               |
+    Amp  0|._._._._        |       ._._._._        |       ._._._._
+          |                |       |               |       |
+        -5|                ._._._._                ._._._._
+           * . . . * . . . * . . . * . . . * . . . * . . . * . . . * .
+    Time   0       4       8      12      16      20      24      28
+    Delta         +5     -10      +5      +5     -10      +5      +5
+
+Code to generate above waveform:
+
+	blip_add_delta( blip,  4,  +5 );
+	blip_add_delta( blip,  8, -10 );
+	blip_add_delta( blip, 12,  +5 );
+	blip_add_delta( blip, 16,  +5 );
+	blip_add_delta( blip, 20, +10 );
+	blip_add_delta( blip, 24,  +5 );
+	blip_add_delta( blip, 28,  +5 );
+
+Similarly, multiple waveforms can be added within a time frame without
+problem. It doesn't matter what order they're added, because all the
+library needs are the deltas. The synthesis code doesn't need to know
+all the waveforms at once either; it can calculate and add the deltas
+for each waveform individually. Deltas don't need to be added in
+chronological order either.
+
+
+Sample buffering
+----------------
+Sample buffering is very flexible. Once a time frame is ended, the
+resampled waveforms become output samples that are immediately made
+available for reading with blip_read_samples(). They don't have to be
+read immediately; they can be allowed to accumulate in the buffer, with
+each time frame appending more samples to the buffer. When reading, some
+or all of the samples in can be read out, with the remaining unread
+samples staying in the buffer for later. Usually a program will
+immediately read all available samples after ending a time frame and
+play them immediately. In some systems, a program needs samples in
+fixed-length blocks; in that case, it would keep generating time frames
+until some number of samples are available, then read only that many,
+even if slightly more were available in the buffer.
+
+In some systems, one wants to run waveform generation for exactly the
+number of clocks necessary to generate some desired number of output
+samples, and no more. In that case, use blip_clocks_needed( blip, N ) to
+find out how many clocks are needed to generate N additional samples.
+Ending a time frame with this value will result in exactly N more
+samples becoming available for reading.
+
+
+Thanks
+------
+Thanks to Jsr (FamiTracker author), the Mednafen team (multi-system
+emulator), ShizZie (Nhes GMB author), Marcel van Tongeren, Luke Molnar
+(UberNES author), Fredrick Meunier (Fuse contributor) for using and
+giving feedback for another similar library. Thanks to Disch for his
+interest and discussions about the synthesis algorithm itself, and for
+writing his own implementation of it (Schpune) rather than just using
+mine. Thanks to Xodnizel for Festalon, whose sound quality got me
+interested in video game sound emulation in the first place, and where I
+first came up with the algorithm while optimizing its brute-force
+filter.
+
+-- 
+Shay Green <gblargg@gmail.com>
diff --git a/src/engine/dispatch.h b/src/engine/dispatch.h
index 507d7fc7e..bde0d0210 100644
--- a/src/engine/dispatch.h
+++ b/src/engine/dispatch.h
@@ -1,3 +1,6 @@
+#ifndef _DISPATCH_H
+#define _DISPATCH_H
+
 enum DivDispatchCmds {
   DIV_CMD_NOTE_ON=0,
   DIV_CMD_NOTE_OFF,
@@ -17,15 +20,27 @@ struct DivDelayedCommand {
   DivCommand cmd;
 };
 
+class DivEngine;
+
 class DivDispatch {
+  protected:
+    DivEngine* parent;
   public:
-    virtual void acquire(float& l, float& r);
+    /**
+     * the rate the samples are provided.
+     * the engine shall resample to the output rate.
+     */
+    int rate;
+    virtual void acquire(short& l, short& r);
     virtual int dispatch(DivCommand c);
 
     /**
      * initialize this DivDispatch.
+     * @param parent the parent DivEngine.
      * @param channels the number of channels to acquire.
+     * @param sugRate the suggested rate. this may change, so don't rely on it.
      * @return the number of channels allocated.
      */
-    virtual int init(int channels);
+    virtual int init(DivEngine* parent, int channels, int sugRate);
 };
+#endif
\ No newline at end of file
diff --git a/src/engine/engine.cpp b/src/engine/engine.cpp
index 65dfece04..df301b0e9 100644
--- a/src/engine/engine.cpp
+++ b/src/engine/engine.cpp
@@ -2,16 +2,11 @@
 #include "safeReader.h"
 #include "../ta-log.h"
 #include "../audio/sdl.h"
+#include "platform/dummy.h"
 #include <zlib.h>
 
-void process(float** in, float** out, int inChans, int outChans, unsigned int size) {
-  static int count;
-  for (unsigned int i=0; i<size; i++) {
-    count++;
-    for (int j=0; j<outChans; j++) {
-      out[j][i]=((count%160)>40)?0.5:0.0;
-    }
-  }
+void process(void* u, float** in, float** out, int inChans, int outChans, unsigned int size) {
+  ((DivEngine*)u)->nextBuf(in,out,inChans,outChans,size);
 }
 
 #define DIV_READ_SIZE 131072
@@ -596,6 +591,7 @@ bool DivEngine::load(void* f, size_t slen) {
     }
 
     song=ds;
+    chans=getChannelCount(song.system);
   } catch (EndOfFileException e) {
     logE("premature end of file!\n");
     return false;
@@ -617,7 +613,7 @@ bool DivEngine::init() {
   want.outFormat=TA_AUDIO_FORMAT_F32;
   want.name="DivAudio";
 
-  output->setCallback(process);
+  output->setCallback(process,this);
 
   logI("initializing audio.\n");
   if (!output->init(want,got)) {
@@ -625,8 +621,29 @@ bool DivEngine::init() {
     return false;
   }
 
+  bb[0]=blip_new(32768);
+  if (bb[0]==NULL) {
+    logE("not enough memory!\n");
+    return false;
+  }
+
+  bb[1]=blip_new(32768);
+  if (bb[1]==NULL) {
+    logE("not enough memory!\n");
+    return false;
+  }
+  
+  bbOut[0]=new short[got.bufsize];
+  bbOut[1]=new short[got.bufsize];
+
+  dispatch=new DivPlatformDummy;
+  dispatch->init(this,getChannelCount(song.system),got.rate);
+
+  blip_set_rates(bb[0],dispatch->rate,got.rate);
+  blip_set_rates(bb[1],dispatch->rate,got.rate);
+
   if (!output->setRun(true)) {
-    printf("error while activating!\n");
+    logE("error while activating!\n");
     return false;
   }
   return true;
diff --git a/src/engine/engine.h b/src/engine/engine.h
index 23176d343..0472d26a4 100644
--- a/src/engine/engine.h
+++ b/src/engine/engine.h
@@ -1,6 +1,9 @@
+#ifndef _ENGINE_H
+#define _ENGINE_H
 #include "song.h"
 #include "dispatch.h"
 #include "../audio/taAudio.h"
+#include "blip_buf.h"
 
 struct DivChannelState {
   std::vector<DivDelayedCommand> delayed;
@@ -18,10 +21,19 @@ class DivEngine {
   int chans;
   bool playing;
   bool speedAB;
-  int ticks, curRow, curOrder;
+  int ticks, cycles, curRow, curOrder;
   std::vector<DivChannelState> chan;
 
+  blip_buffer_t* bb[2];
+  short temp[2], prevSample[2];
+  short* bbOut[2];
+
+  void nextOrder();
+  void nextRow();
+  void nextTick();
+
   public:
+    void nextBuf(float** in, float** out, int inChans, int outChans, unsigned int size);
     // load a .dmf.
     bool load(void* f, size_t length);
     // save as .dmf.
@@ -31,5 +43,17 @@ class DivEngine {
     void play();
 
     // initialize the engine.
-    bool init();    
+    bool init();
+
+    DivEngine():
+      chans(0),
+      playing(false),
+      speedAB(false),
+      ticks(0),
+      cycles(0),
+      curRow(-1),
+      curOrder(0),
+      temp{0,0},
+      prevSample{0,0} {}
 };
+#endif
\ No newline at end of file
diff --git a/src/engine/platform/abstract.cpp b/src/engine/platform/abstract.cpp
new file mode 100644
index 000000000..58fb407d5
--- /dev/null
+++ b/src/engine/platform/abstract.cpp
@@ -0,0 +1,14 @@
+#include "../dispatch.h"
+
+void DivDispatch::acquire(short& l, short& r) {
+  l=0;
+  r=0;
+}
+
+int DivDispatch::dispatch(DivCommand c) {
+  return 1;
+}
+
+int DivDispatch::init(DivEngine* p, int channels, int sugRate) {
+  return 0;
+}
\ No newline at end of file
diff --git a/src/engine/platform/dummy.cpp b/src/engine/platform/dummy.cpp
index e69de29bb..ea8327ca5 100644
--- a/src/engine/platform/dummy.cpp
+++ b/src/engine/platform/dummy.cpp
@@ -0,0 +1,16 @@
+#include "dummy.h"
+
+void DivPlatformDummy::acquire(short& l, short& r) {
+  l=0;
+  r=0;
+}
+
+int DivPlatformDummy::dispatch(DivCommand c) {
+  return 1;
+}
+
+int DivPlatformDummy::init(DivEngine* p, int channels, int sugRate) {
+  parent=p;
+  rate=sugRate;
+  return channels;
+}
\ No newline at end of file
diff --git a/src/engine/platform/dummy.h b/src/engine/platform/dummy.h
index e90013bdb..e3ee07d6b 100644
--- a/src/engine/platform/dummy.h
+++ b/src/engine/platform/dummy.h
@@ -4,7 +4,7 @@
 // used when a DivDispatch for a system is not found.
 class DivPlatformDummy: public DivDispatch {
   public:
-    void acquire(float& l, float& r);
+    void acquire(short& l, short& r);
     int dispatch(DivCommand c);
-    int init(int channels);
+    int init(DivEngine* parent, int channels, int sugRate);
 };
\ No newline at end of file
diff --git a/src/engine/playback.cpp b/src/engine/playback.cpp
new file mode 100644
index 000000000..eccf6d9cd
--- /dev/null
+++ b/src/engine/playback.cpp
@@ -0,0 +1,118 @@
+#include "engine.h"
+
+void DivEngine::nextOrder() {
+  curRow=0;
+  if (++curOrder>=song.ordersLen) {
+    curOrder=0;
+  }
+}
+
+const char* notes[12]={
+  "C-", "C#", "D-", "D#", "E-", "F-", "F#", "G-", "G#", "A-", "A#", "B-"
+};
+
+const char* formatNote(unsigned char note, unsigned char octave) {
+  static char ret[4];
+  if (note==100) {
+    return "OFF";
+  } else if (octave==0) {
+    return "---";
+  }
+  snprintf(ret,4,"%s%d",notes[note%12],octave+note/12);
+  return ret;
+}
+
+void DivEngine::nextRow() {
+  static char pb[4096];
+  static char pb1[4096];
+  static char pb2[4096];
+  static char pb3[4096];
+  if (++curRow>=song.patLen) {
+    nextOrder();
+  }
+  strcpy(pb1,"");
+  strcpy(pb3,"");
+  for (int i=0; i<chans; i++) {
+    snprintf(pb,4095," %.2x",song.orders.ord[i][curOrder]);
+    strcat(pb1,pb);
+    
+    DivPattern* pat=song.pat[i]->data[curOrder];
+    snprintf(pb2,4095,"\x1b[37m %s",
+             formatNote(pat->data[curRow][0],pat->data[curRow][1]));
+    strcat(pb3,pb2);
+    if (pat->data[curRow][3]==255) {
+      strcat(pb3,"\x1b[m--");
+    } else {
+      snprintf(pb2,4095,"\x1b[1;32m%.2x",pat->data[curRow][3]);
+      strcat(pb3,pb2);
+    }
+    if (pat->data[curRow][2]==255) {
+      strcat(pb3,"\x1b[m--");
+    } else {
+      snprintf(pb2,4095,"\x1b[0;36m%.2x",pat->data[curRow][2]);
+      strcat(pb3,pb2);
+    }
+    for (int j=0; j<song.pat[i]->effectRows; j++) {
+      if (pat->data[curRow][4+(j<<1)]==255) {
+        strcat(pb3,"\x1b[m--");
+      } else {
+        snprintf(pb2,4095,"\x1b[1;31m%.2x",pat->data[curRow][4+(j<<1)]);
+        strcat(pb3,pb2);
+      }
+      if (pat->data[curRow][5+(j<<1)]==255) {
+        strcat(pb3,"\x1b[m--");
+      } else {
+        snprintf(pb2,4095,"\x1b[1;37m%.2x",pat->data[curRow][5+(j<<1)]);
+        strcat(pb3,pb2);
+      }
+    }
+  }
+  printf("| %.2x:%s | \x1b[1;33m%3d%s\x1b[m\n",curOrder,pb1,curRow,pb3);
+}
+
+void DivEngine::nextTick() {
+  if (song.customTempo) {
+    cycles=dispatch->rate/song.hz;
+  } else {
+    if (song.pal) {
+      cycles=dispatch->rate/60;
+    } else {
+      cycles=dispatch->rate/50;
+    }
+  }
+  if (--ticks<=0) {
+    if (speedAB) {
+      ticks=song.speed2*(song.timeBase+1);
+    } else {
+      ticks=song.speed1*(song.timeBase+1);
+    }
+    speedAB=!speedAB;
+    nextRow();
+  }
+}
+
+void DivEngine::nextBuf(float** in, float** out, int inChans, int outChans, unsigned int size) {
+  size_t runtotal=blip_clocks_needed(bb[0],size);
+  for (size_t i=0; i<runtotal; i++) {
+    if (--cycles<=0) {
+      nextTick();
+    }
+    dispatch->acquire(temp[0],temp[1]);
+
+    blip_add_delta(bb[0],i,temp[0]-prevSample[0]);
+    blip_add_delta(bb[1],i,temp[1]-prevSample[1]);
+    prevSample[0]=temp[0];
+    prevSample[1]=temp[1];
+  }
+
+  blip_end_frame(bb[0],runtotal);
+  blip_end_frame(bb[1],runtotal);
+
+  blip_read_samples(bb[0],bbOut[0],size,0);
+  blip_read_samples(bb[1],bbOut[1],size,0);
+
+  for (size_t i=0; i<size; i++) {
+    out[0][i]=(float)bbOut[0][i]/32768.0;
+    out[1][i]=(float)bbOut[1][i]/32768.0;
+  }
+}
\ No newline at end of file