From d4c48a06a1d34ffe04996c0a3d0c7004835ba576 Mon Sep 17 00:00:00 2001
From: tildearrow <me@tildearrow.org>
Date: Mon, 8 Apr 2024 03:40:16 -0500
Subject: [PATCH] asfdafds

---
 extern/imgui_software_renderer/imgui_sw.cpp | 1008 +++++++++----------
 1 file changed, 502 insertions(+), 506 deletions(-)
diff --git a/extern/imgui_software_renderer/imgui_sw.cpp b/extern/imgui_software_renderer/imgui_sw.cpp
index 7721fffe1..553904336 100644
--- a/extern/imgui_software_renderer/imgui_sw.cpp
+++ b/extern/imgui_software_renderer/imgui_sw.cpp
@@ -32,581 +32,577 @@ static ImGui_ImplSW_Data* ImGui_ImplSW_GetBackendData()
     return ImGui::GetCurrentContext() ? (ImGui_ImplSW_Data*)ImGui::GetIO().BackendRendererUserData : nullptr;
 }
 
-// TODO: de-namespace and static-ize
-namespace {
-  struct PaintTarget
-  {
-    uint32_t *pixels;
-    int width;
-    int height;
-    ImVec2 scale;// Multiply ImGui (point) coordinates with this to get pixel coordinates.
-    ImVec2 DisplayPos;
-  };
+struct PaintTarget
+{
+  uint32_t *pixels;
+  int width;
+  int height;
+  ImVec2 DisplayPos;
+};
 
-  // ----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
 
 #pragma pack(push, 1)
-  union ColorInt
-  {
-    struct {
-      uint8_t r, g, b, a;
-    };
-    uint32_t u32;
-    ColorInt():
-      u32(0) {}
-
-
-    ColorInt &operator*=(const ColorInt &other)
-    {
-      r = r * other.r / 255;
-      g = g * other.g / 255;
-      b = b * other.b / 255;
-      a = a * other.a / 255;
-      return *this;
-    }
+union ColorInt
+{
+  struct {
+    uint8_t r, g, b, a;
   };
+  uint32_t u32;
+  ColorInt():
+    u32(0) {}
+
+  ColorInt(uint32_t c):
+    u32(c) {}
+
+
+  ColorInt &operator*=(const ColorInt &other)
+  {
+    r = (r * other.r + 255) >> 8;
+    g = (g * other.g + 255) >> 8;
+    b = (b * other.b + 255) >> 8;
+    a = (a * other.a + 255) >> 8;
+    return *this;
+  }
+};
 #pragma pack(pop)
 
-  uint32_t blend(const ColorInt &target, const ColorInt &source)
-  {
-    if (source.a >= 255) return *reinterpret_cast<const uint32_t *>(&source);
-    return (target.a << 24u) | (((source.b * source.a + target.b * (255 - source.a)) / 255) << 16u)
-           | (((source.g * source.a + target.g * (255 - source.a)) / 255) << 8u)
-           | ((source.r * source.a + target.r * (255 - source.a)) / 255);
+uint32_t blend(const ColorInt &target, const ColorInt &source)
+{
+  if (source.a >= 255) return source.u32;
+  return (target.a << 24u) | (((source.b * source.a + target.b * (255 - source.a)) / 255) << 16u)
+          | (((source.g * source.a + target.g * (255 - source.a)) / 255) << 8u)
+          | ((source.r * source.a + target.r * (255 - source.a)) / 255);
+}
+
+// ----------------------------------------------------------------------------
+// Used for interpolating vertex attributes (color and texture coordinates) in a triangle.
+
+struct Barycentric
+{
+  float w0, w1, w2;
+};
+
+Barycentric operator*(const float f, const Barycentric &va) { return { f * va.w0, f * va.w1, f * va.w2 }; }
+
+void operator+=(Barycentric &a, const Barycentric &b)
+{
+  a.w0 += b.w0;
+  a.w1 += b.w1;
+  a.w2 += b.w2;
+}
+
+Barycentric operator+(const Barycentric &a, const Barycentric &b)
+{
+  return Barycentric{ a.w0 + b.w0, a.w1 + b.w1, a.w2 + b.w2 };
+}
+
+// ----------------------------------------------------------------------------
+// Useful operators on ImGui vectors:
+
+ImVec2 operator*(const float f, const ImVec2 &v) { return ImVec2{ f * v.x, f * v.y }; }
+
+bool operator!=(const ImVec2 &a, const ImVec2 &b) { return a.x != b.x || a.y != b.y; }
+
+ImVec4 operator*(const float f, const ImVec4 &v) { return ImVec4{ f * v.x, f * v.y, f * v.z, f * v.w }; }
+
+// ----------------------------------------------------------------------------
+// Copies of functions in ImGui, inlined for speed:
+
+inline ImVec4 color_convert_u32_to_float4(ImU32 in)
+{
+  const float s = 1.0f / 255.0f;
+  return ImVec4(((in >> IM_COL32_R_SHIFT) & 0xFF) * s,
+    ((in >> IM_COL32_G_SHIFT) & 0xFF) * s,
+    ((in >> IM_COL32_B_SHIFT) & 0xFF) * s,
+    ((in >> IM_COL32_A_SHIFT) & 0xFF) * s);
+}
+
+inline ImU32 color_convert_float4_to_u32(const ImVec4 &in)
+{
+  ImU32 out;
+  out = uint32_t(in.x * 255.0f + 0.5f) << IM_COL32_R_SHIFT;
+  out |= uint32_t(in.y * 255.0f + 0.5f) << IM_COL32_G_SHIFT;
+  out |= uint32_t(in.z * 255.0f + 0.5f) << IM_COL32_B_SHIFT;
+  out |= uint32_t(in.w * 255.0f + 0.5f) << IM_COL32_A_SHIFT;
+  return out;
+}
+
+// ----------------------------------------------------------------------------
+// For fast and subpixel-perfect triangle rendering we used fixed point arithmetic.
+// To keep the code simple we use 64 bits to avoid overflows.
+// TODO: make it 32-bit or else
+
+using Int = int64_t;
+const Int kFixedBias = 256;
+
+struct Point
+{
+  Int x, y;
+};
+
+Int orient2d(const Point &a, const Point &b, const Point &c)
+{
+  return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x);
+}
+
+Int as_int(float v) { return static_cast<Int>(floor(v * kFixedBias)); }
+
+Point as_point(ImVec2 v) { return Point{ as_int(v.x), as_int(v.y) }; }
+
+// ----------------------------------------------------------------------------
+
+inline float min3(float a, float b, float c)
+{
+  if (a < b && a < c) { return a; }
+  return b < c ? b : c;
+}
+
+inline float max3(float a, float b, float c)
+{
+  if (a > b && a > c) { return a; }
+  return b > c ? b : c;
+}
+
+inline float barycentric(const ImVec2 &a, const ImVec2 &b, const ImVec2 &point)
+{
+  return (b.x - a.x) * (point.y - a.y) - (b.y - a.y) * (point.x - a.x);
+}
+
+inline uint8_t sample_font_texture(const SWTexture &texture, int x, int y)
+{
+  return reinterpret_cast<const uint8_t *>(texture.pixels)[x + y * texture.width];
+}
+
+inline uint32_t sample_texture(const SWTexture &texture, int x, int y) { return texture.pixels[x + y * texture.width]; }
+
+static void paint_uniform_rectangle(const PaintTarget &target,
+  const ImVec2 &min_f,
+  const ImVec2 &max_f,
+  const ColorInt &color)
+{
+  // Integer bounding box [min, max):
+  int min_x_i = static_cast<int>(min_f.x + 0.5f);
+  int min_y_i = static_cast<int>(min_f.y + 0.5f);
+  int max_x_i = static_cast<int>(max_f.x + 0.5f);
+  int max_y_i = static_cast<int>(max_f.y + 0.5f);
+
+  // Clamp to render target:
+  min_x_i = std::max(min_x_i, 0);
+  min_y_i = std::max(min_y_i, 0);
+  max_x_i = std::min(max_x_i, target.width);
+  max_y_i = std::min(max_y_i, target.height);
+
+  // We often blend the same colors over and over again, so optimize for this (saves 25% total cpu):
+  uint32_t last_target_pixel = target.pixels[min_y_i * target.width + min_x_i];
+  const auto *lastColorRef = reinterpret_cast<const ColorInt *>(&last_target_pixel);
+  uint32_t last_output = blend(*lastColorRef, color);
+
+  for (int y = min_y_i; y < max_y_i; ++y) {
+    for (int x = min_x_i; x < max_x_i; ++x) {
+      uint32_t &target_pixel = target.pixels[y * target.width + x];
+      if (target_pixel == last_target_pixel) {
+        target_pixel = last_output;
+        continue;
+      }
+      last_target_pixel = target_pixel;
+      const auto *colorRef = reinterpret_cast<const ColorInt *>(&target_pixel);
+      target_pixel = blend(*colorRef, color);
+      last_output = target_pixel;
+    }
   }
+}
 
-  // ----------------------------------------------------------------------------
-  // Used for interpolating vertex attributes (color and texture coordinates) in a triangle.
+static void paint_uniform_textured_rectangle(const PaintTarget &target,
+  const SWTexture &texture,
+  const ImVec4 &clip_rect,
+  const ImDrawVert &min_v,
+  const ImDrawVert &max_v)
+{
+  const ImVec2 min_p = ImVec2(min_v.pos.x, min_v.pos.y);
+  const ImVec2 max_p = ImVec2(max_v.pos.x, max_v.pos.y);
 
-  struct Barycentric
-  {
-    float w0, w1, w2;
+  float distanceX = max_p.x - min_p.x;
+  float distanceY = max_p.y - min_p.y;
+  if (distanceX == 0 || distanceY == 0) { return; }
+
+  // Find bounding box:
+  float min_x_f = min_p.x;
+  float min_y_f = min_p.y;
+  float max_x_f = max_p.x;
+  float max_y_f = max_p.y;
+
+  // Clip against clip_rect:
+  min_x_f = std::max(min_x_f, clip_rect.x - target.DisplayPos.x);
+  min_y_f = std::max(min_y_f, clip_rect.y - target.DisplayPos.y);
+  max_x_f = std::min(max_x_f, clip_rect.z - 0.5f - target.DisplayPos.x);
+  max_y_f = std::min(max_y_f, clip_rect.w - 0.5f - target.DisplayPos.y);
+
+  // Integer bounding box [min, max):
+  int min_x_i = static_cast<int>(min_x_f);
+  int min_y_i = static_cast<int>(min_y_f);
+  int max_x_i = static_cast<int>(max_x_f + 1.0f);
+  int max_y_i = static_cast<int>(max_y_f + 1.0f);
+
+  // Clip against render target:
+  min_x_i = std::max(min_x_i, 0);
+  min_y_i = std::max(min_y_i, 0);
+  max_x_i = std::min(max_x_i, target.width);
+  max_y_i = std::min(max_y_i, target.height);
+
+  const auto topleft = ImVec2(min_x_i + 0.5f, min_y_i + 0.5f);
+  const ImVec2 delta_uv_per_pixel = {
+    (max_v.uv.x - min_v.uv.x) / distanceX,
+    (max_v.uv.y - min_v.uv.y) / distanceY,
+  };
+  const ImVec2 uv_topleft = {
+    min_v.uv.x + (topleft.x - min_v.pos.x) * delta_uv_per_pixel.x,
+    min_v.uv.y + (topleft.y - min_v.pos.y) * delta_uv_per_pixel.y,
   };
 
-  Barycentric operator*(const float f, const Barycentric &va) { return { f * va.w0, f * va.w1, f * va.w2 }; }
+  int startX = uv_topleft.x * (texture.width - 1.0f) + 0.5f;
+  int startY = uv_topleft.y * (texture.height - 1.0f) + 0.5f;
 
-  void operator+=(Barycentric &a, const Barycentric &b)
-  {
-    a.w0 += b.w0;
-    a.w1 += b.w1;
-    a.w2 += b.w2;
+  int currentX = startX;
+  int currentY = startY;
+
+  float deltaX = delta_uv_per_pixel.x * texture.width;
+  float deltaY = delta_uv_per_pixel.y * texture.height;
+
+  for (int y = min_y_i; y < max_y_i; ++y) {
+    currentX = startX;
+    for (int x = min_x_i; x < max_x_i; ++x) {
+      uint32_t& target_pixel = target.pixels[y * target.width + x];
+      const ColorInt targetColorRef = ColorInt(target_pixel);
+      const ColorInt colorRef = ColorInt(min_v.col);
+
+      if (texture.isAlpha) {
+        uint8_t texel = sample_font_texture(texture, currentX, currentY);
+        if (deltaX != 0 && currentX < texture.width - 1) { currentX += 1; }
+
+        // The font texture is all black or all white, so optimize for this:
+        if (texel == 0) { continue; }
+        if (texel == 255) {
+          target_pixel = blend(targetColorRef, colorRef);
+          continue;
+        }
+
+      } else {
+        uint32_t texColor = sample_texture(texture, currentX, currentY);
+        auto src_color = ColorInt(texColor);
+
+        if (deltaX != 0 && currentX < texture.width - 1) { currentX += 1; }
+
+        src_color *= colorRef;
+        target_pixel = blend(targetColorRef, src_color);
+      }
+    }
+    if (deltaY != 0 && currentY < texture.height - 1) { currentY += 1; }
   }
+}
 
-  Barycentric operator+(const Barycentric &a, const Barycentric &b)
-  {
-    return Barycentric{ a.w0 + b.w0, a.w1 + b.w1, a.w2 + b.w2 };
-  }
+// When two triangles share an edge, we want to draw the pixels on that edge exactly once.
+// The edge will be the same, but the direction will be the opposite
+// (assuming the two triangles have the same winding order).
+// Which edge wins? This functions decides.
+static bool is_dominant_edge(ImVec2 edge)
+{
+  // return edge.x < 0 || (edge.x == 0 && edge.y > 0);
+  return edge.y > 0 || (edge.y == 0 && edge.x < 0);
+}
 
-  // ----------------------------------------------------------------------------
-  // Useful operators on ImGui vectors:
+// Handles triangles in any winding order (CW/CCW)
+static void paint_triangle(const PaintTarget &target,
+  const SWTexture *texture,
+  const ImVec4 &clip_rect,
+  const ImDrawVert &v0,
+  const ImDrawVert &v1,
+  const ImDrawVert &v2)
+{
+  const ImVec2 p0 = ImVec2(v0.pos.x, v0.pos.y);
+  const ImVec2 p1 = ImVec2(v1.pos.x, v1.pos.y);
+  const ImVec2 p2 = ImVec2(v2.pos.x, v2.pos.y);
 
-  ImVec2 operator*(const float f, const ImVec2 &v) { return ImVec2{ f * v.x, f * v.y }; }
+  const auto rect_area = barycentric(p0, p1, p2);// Can be positive or negative depending on winding order
+  if (rect_area == 0.0f) { return; }
+  // if (rect_area < 0.0f) { return paint_triangle(target, texture, clip_rect, v0, v2, v1); }
 
-  bool operator!=(const ImVec2 &a, const ImVec2 &b) { return a.x != b.x || a.y != b.y; }
+  // Find bounding box:
+  float min_x_f = min3(p0.x, p1.x, p2.x);
+  float min_y_f = min3(p0.y, p1.y, p2.y);
+  float max_x_f = max3(p0.x, p1.x, p2.x);
+  float max_y_f = max3(p0.y, p1.y, p2.y);
 
-  ImVec4 operator*(const float f, const ImVec4 &v) { return ImVec4{ f * v.x, f * v.y, f * v.z, f * v.w }; }
+  // Clip against clip_rect:
+  min_x_f = std::max(min_x_f, clip_rect.x - target.DisplayPos.x);
+  min_y_f = std::max(min_y_f, clip_rect.y - target.DisplayPos.y);
+  max_x_f = std::min(max_x_f, clip_rect.z - 0.5f - target.DisplayPos.x);
+  max_y_f = std::min(max_y_f, clip_rect.w - 0.5f - target.DisplayPos.y);
 
-  // ----------------------------------------------------------------------------
-  // Copies of functions in ImGui, inlined for speed:
+  // Integer bounding box [min, max):
+  int min_x_i = static_cast<int>(min_x_f);
+  int min_y_i = static_cast<int>(min_y_f);
+  int max_x_i = static_cast<int>(max_x_f + 1.0f);
+  int max_y_i = static_cast<int>(max_y_f + 1.0f);
 
-  ImVec4 color_convert_u32_to_float4(ImU32 in)
-  {
-    const float s = 1.0f / 255.0f;
-    return ImVec4(((in >> IM_COL32_R_SHIFT) & 0xFF) * s,
-      ((in >> IM_COL32_G_SHIFT) & 0xFF) * s,
-      ((in >> IM_COL32_B_SHIFT) & 0xFF) * s,
-      ((in >> IM_COL32_A_SHIFT) & 0xFF) * s);
-  }
+  // Clip against render target:
+  min_x_i = std::max(min_x_i, 0);
+  min_y_i = std::max(min_y_i, 0);
+  max_x_i = std::min(max_x_i, target.width);
+  max_y_i = std::min(max_y_i, target.height);
 
-  ImU32 color_convert_float4_to_u32(const ImVec4 &in)
-  {
-    ImU32 out;
-    out = uint32_t(in.x * 255.0f + 0.5f) << IM_COL32_R_SHIFT;
-    out |= uint32_t(in.y * 255.0f + 0.5f) << IM_COL32_G_SHIFT;
-    out |= uint32_t(in.z * 255.0f + 0.5f) << IM_COL32_B_SHIFT;
-    out |= uint32_t(in.w * 255.0f + 0.5f) << IM_COL32_A_SHIFT;
-    return out;
-  }
+  // ------------------------------------------------------------------------
+  // Set up interpolation of barycentric coordinates:
 
-  // ----------------------------------------------------------------------------
-  // For fast and subpixel-perfect triangle rendering we used fixed point arithmetic.
-  // To keep the code simple we use 64 bits to avoid overflows.
-  // TODO: make it 32-bit or else
+  const auto topleft = ImVec2(min_x_i + 0.5f, min_y_i + 0.5f);
+  const auto dx = ImVec2(1, 0);
+  const auto dy = ImVec2(0, 1);
 
-  using Int = int64_t;
-  const Int kFixedBias = 256;
+  const auto w0_topleft = barycentric(p1, p2, topleft);
+  const auto w1_topleft = barycentric(p2, p0, topleft);
+  const auto w2_topleft = barycentric(p0, p1, topleft);
 
-  struct Point
-  {
-    Int x, y;
-  };
+  const auto w0_dx = barycentric(p1, p2, topleft + dx) - w0_topleft;
+  const auto w1_dx = barycentric(p2, p0, topleft + dx) - w1_topleft;
+  const auto w2_dx = barycentric(p0, p1, topleft + dx) - w2_topleft;
 
-  Int orient2d(const Point &a, const Point &b, const Point &c)
-  {
-    return (b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x);
-  }
+  const auto w0_dy = barycentric(p1, p2, topleft + dy) - w0_topleft;
+  const auto w1_dy = barycentric(p2, p0, topleft + dy) - w1_topleft;
+  const auto w2_dy = barycentric(p0, p1, topleft + dy) - w2_topleft;
 
-  Int as_int(float v) { return static_cast<Int>(floor(v * kFixedBias)); }
+  const Barycentric bary_0{ 1, 0, 0 };
+  const Barycentric bary_1{ 0, 1, 0 };
+  const Barycentric bary_2{ 0, 0, 1 };
 
-  Point as_point(ImVec2 v) { return Point{ as_int(v.x), as_int(v.y) }; }
+  const auto inv_area = 1 / rect_area;
+  const Barycentric bary_topleft = inv_area * (w0_topleft * bary_0 + w1_topleft * bary_1 + w2_topleft * bary_2);
+  const Barycentric bary_dx = inv_area * (w0_dx * bary_0 + w1_dx * bary_1 + w2_dx * bary_2);
+  const Barycentric bary_dy = inv_area * (w0_dy * bary_0 + w1_dy * bary_1 + w2_dy * bary_2);
 
-  // ----------------------------------------------------------------------------
+  Barycentric bary_current_row = bary_topleft;
 
-  float min3(float a, float b, float c)
-  {
-    if (a < b && a < c) { return a; }
-    return b < c ? b : c;
-  }
+  // ------------------------------------------------------------------------
+  // For pixel-perfect inside/outside testing:
 
-  float max3(float a, float b, float c)
-  {
-    if (a > b && a > c) { return a; }
-    return b > c ? b : c;
-  }
+  const int sign = rect_area > 0 ? 1 : -1;// winding order?
 
-  float barycentric(const ImVec2 &a, const ImVec2 &b, const ImVec2 &point)
-  {
-    return (b.x - a.x) * (point.y - a.y) - (b.y - a.y) * (point.x - a.x);
-  }
+  const int bias0i = is_dominant_edge(p2 - p1) ? 0 : -1;
+  const int bias1i = is_dominant_edge(p0 - p2) ? 0 : -1;
+  const int bias2i = is_dominant_edge(p1 - p0) ? 0 : -1;
 
-  inline uint8_t sample_font_texture(const SWTexture &texture, int x, int y)
-  {
-    return reinterpret_cast<const uint8_t *>(texture.pixels)[x + y * texture.width];
-  }
+  const auto p0i = as_point(p0);
+  const auto p1i = as_point(p1);
+  const auto p2i = as_point(p2);
 
-  inline uint32_t sample_texture(const SWTexture &texture, int x, int y) { return texture.pixels[x + y * texture.width]; }
+  // ------------------------------------------------------------------------
 
-  void paint_uniform_rectangle(const PaintTarget &target,
-    const ImVec2 &min_f,
-    const ImVec2 &max_f,
-    const ColorInt &color)
-  {
-    // Integer bounding box [min, max):
-    int min_x_i = static_cast<int>(target.scale.x * min_f.x + 0.5f);
-    int min_y_i = static_cast<int>(target.scale.y * min_f.y + 0.5f);
-    int max_x_i = static_cast<int>(target.scale.x * max_f.x + 0.5f);
-    int max_y_i = static_cast<int>(target.scale.y * max_f.y + 0.5f);
+  const bool has_uniform_color = (v0.col == v1.col && v0.col == v2.col);
 
-    // Clamp to render target:
-    min_x_i = std::max(min_x_i, 0);
-    min_y_i = std::max(min_y_i, 0);
-    max_x_i = std::min(max_x_i, target.width);
-    max_y_i = std::min(max_y_i, target.height);
+  const ImVec4 c0 = color_convert_u32_to_float4(v0.col);
+  const ImVec4 c1 = color_convert_u32_to_float4(v1.col);
+  const ImVec4 c2 = color_convert_u32_to_float4(v2.col);
 
-    // We often blend the same colors over and over again, so optimize for this (saves 25% total cpu):
-    uint32_t last_target_pixel = target.pixels[min_y_i * target.width + min_x_i];
-    const auto *lastColorRef = reinterpret_cast<const ColorInt *>(&last_target_pixel);
-    uint32_t last_output = blend(*lastColorRef, color);
+  // We often blend the same colors over and over again, so optimize for this (saves 10% total cpu):
+  uint32_t last_target_pixel = 0;
+  const auto *lastColorRef = reinterpret_cast<const ColorInt *>(&last_target_pixel);
+  const auto *colorRef = reinterpret_cast<const ColorInt *>(&v0.col);
+  uint32_t last_output = blend(*lastColorRef, *colorRef);
 
-    for (int y = min_y_i; y < max_y_i; ++y) {
-      for (int x = min_x_i; x < max_x_i; ++x) {
-        uint32_t &target_pixel = target.pixels[y * target.width + x];
+  for (int y = min_y_i; y < max_y_i; ++y) {
+    auto bary = bary_current_row;
+
+    bool has_been_inside_this_row = false;
+
+    for (int x = min_x_i; x < max_x_i; ++x) {
+      const auto w0 = bary.w0;
+      const auto w1 = bary.w1;
+      const auto w2 = bary.w2;
+      bary += bary_dx;
+
+      {
+        // Inside/outside test:
+        const auto p = Point{ kFixedBias * x + kFixedBias / 2, kFixedBias * y + kFixedBias / 2 };
+        const auto w0i = sign * orient2d(p1i, p2i, p) + bias0i;
+        const auto w1i = sign * orient2d(p2i, p0i, p) + bias1i;
+        const auto w2i = sign * orient2d(p0i, p1i, p) + bias2i;
+        if (w0i < 0 || w1i < 0 || w2i < 0) {
+          if (has_been_inside_this_row) {
+            break;// Gives a nice 10% speedup
+          } else {
+            continue;
+          }
+        }
+      }
+      has_been_inside_this_row = true;
+
+      uint32_t &target_pixel = target.pixels[y * target.width + x];
+
+      if (has_uniform_color && !texture) {
         if (target_pixel == last_target_pixel) {
           target_pixel = last_output;
           continue;
         }
         last_target_pixel = target_pixel;
-        const auto *colorRef = reinterpret_cast<const ColorInt *>(&target_pixel);
-        target_pixel = blend(*colorRef, color);
+        target_pixel = blend(*lastColorRef, *colorRef);
         last_output = target_pixel;
+        continue;
       }
-    }
-  }
 
-  void paint_uniform_textured_rectangle(const PaintTarget &target,
-    const SWTexture &texture,
-    const ImVec4 &clip_rect,
-    const ImDrawVert &min_v,
-    const ImDrawVert &max_v)
-  {
-    const ImVec2 min_p = ImVec2(target.scale.x * min_v.pos.x, target.scale.y * min_v.pos.y);
-    const ImVec2 max_p = ImVec2(target.scale.x * max_v.pos.x, target.scale.y * max_v.pos.y);
+      ImVec4 src_color;
 
-    float distanceX = max_p.x - min_p.x;
-    float distanceY = max_p.y - min_p.y;
-    if (distanceX == 0 || distanceY == 0) { return; }
-
-    // Find bounding box:
-    float min_x_f = min_p.x;
-    float min_y_f = min_p.y;
-    float max_x_f = max_p.x;
-    float max_y_f = max_p.y;
-
-    // Clip against clip_rect:
-    min_x_f = std::max(min_x_f, target.scale.x * clip_rect.x - target.DisplayPos.x);
-    min_y_f = std::max(min_y_f, target.scale.y * clip_rect.y - target.DisplayPos.y);
-    max_x_f = std::min(max_x_f, target.scale.x * clip_rect.z - 0.5f - target.DisplayPos.x);
-    max_y_f = std::min(max_y_f, target.scale.y * clip_rect.w - 0.5f - target.DisplayPos.y);
-
-    // Integer bounding box [min, max):
-    int min_x_i = static_cast<int>(min_x_f);
-    int min_y_i = static_cast<int>(min_y_f);
-    int max_x_i = static_cast<int>(max_x_f + 1.0f);
-    int max_y_i = static_cast<int>(max_y_f + 1.0f);
-
-    // Clip against render target:
-    min_x_i = std::max(min_x_i, 0);
-    min_y_i = std::max(min_y_i, 0);
-    max_x_i = std::min(max_x_i, target.width);
-    max_y_i = std::min(max_y_i, target.height);
-
-    const auto topleft = ImVec2(min_x_i + 0.5f * target.scale.x, min_y_i + 0.5f * target.scale.y);
-    const ImVec2 delta_uv_per_pixel = {
-      (max_v.uv.x - min_v.uv.x) / distanceX,
-      (max_v.uv.y - min_v.uv.y) / distanceY,
-    };
-    const ImVec2 uv_topleft = {
-      min_v.uv.x + (topleft.x - min_v.pos.x) * delta_uv_per_pixel.x,
-      min_v.uv.y + (topleft.y - min_v.pos.y) * delta_uv_per_pixel.y,
-    };
-
-    int startX = uv_topleft.x * (texture.width - 1.0f) + 0.5f;
-    int startY = uv_topleft.y * (texture.height - 1.0f) + 0.5f;
-
-    int currentX = startX;
-    int currentY = startY;
-
-    float deltaX = delta_uv_per_pixel.x * texture.width;
-    float deltaY = delta_uv_per_pixel.y * texture.height;
-
-    for (int y = min_y_i; y < max_y_i; ++y) {
-      currentX = startX;
-      for (int x = min_x_i; x < max_x_i; ++x) {
-        uint32_t &target_pixel = target.pixels[y * target.width + x];
-        const auto *targetColorRef = reinterpret_cast<const ColorInt *>(&target_pixel);
-        const auto *colorRef = reinterpret_cast<const ColorInt *>(&min_v.col);
-
-        if (texture.isAlpha) {
-          uint8_t texel = sample_font_texture(texture, currentX, currentY);
-          if (deltaX != 0 && currentX < texture.width - 1) { currentX += 1; }
-
-          // The font texture is all black or all white, so optimize for this:
-          if (texel == 0) { continue; }
-          if (texel == 255) {
-            target_pixel = blend(*targetColorRef, *colorRef);
-            continue;
-          }
-
-        } else {
-          auto texColor = sample_texture(texture, currentX, currentY);
-          auto src_color = reinterpret_cast<ColorInt *>(&texColor);
-
-          if (deltaX != 0 && currentX < texture.width - 1) { currentX += 1; }
-
-          *src_color *= *colorRef;
-          target_pixel = blend(*targetColorRef, *src_color);
-        }
+      if (has_uniform_color) {
+        src_color = c0;
+      } else {
+        src_color = w0 * c0 + w1 * c1 + w2 * c2;
       }
-      if (deltaY != 0 && currentY < texture.height - 1) { currentY += 1; }
+
+      if (texture) {
+        if (!texture->isAlpha) { printf("warning: different texture\n"); }
+
+        const ImVec2 uv = w0 * v0.uv + w1 * v1.uv + w2 * v2.uv;
+        int x = uv.x * (texture->width - 1.0f) + 0.5f;
+        int y = uv.y * (texture->height - 1.0f) + 0.5f;
+        src_color.w *= sample_font_texture(*texture, x, y) / 255.0f;
+      }
+
+      if (src_color.w <= 0.0f) { continue; }// Transparent.
+      if (src_color.w >= 1.0f) {
+        // Opaque, no blending needed:
+        target_pixel = color_convert_float4_to_u32(src_color);
+        continue;
+      }
+
+      ImVec4 target_color = color_convert_u32_to_float4(target_pixel);
+      const auto blended_color = src_color.w * src_color + (1.0f - src_color.w) * target_color;
+      target_pixel = color_convert_float4_to_u32(blended_color);
     }
+
+    bary_current_row += bary_dy;
   }
+}
 
-  // When two triangles share an edge, we want to draw the pixels on that edge exactly once.
-  // The edge will be the same, but the direction will be the opposite
-  // (assuming the two triangles have the same winding order).
-  // Which edge wins? This functions decides.
-  bool is_dominant_edge(ImVec2 edge)
-  {
-    // return edge.x < 0 || (edge.x == 0 && edge.y > 0);
-    return edge.y > 0 || (edge.y == 0 && edge.x < 0);
-  }
+static void paint_draw_cmd(const PaintTarget &target,
+  const ImDrawVert *vertices,
+  const ImDrawIdx *idx_buffer,
+  const ImDrawCmd &pcmd,
+  const SwOptions &options)
+{
+  const auto texture = reinterpret_cast<const SWTexture *>(pcmd.TextureId);
+  IM_ASSERT(texture);
 
-  // Handles triangles in any winding order (CW/CCW)
-  void paint_triangle(const PaintTarget &target,
-    const SWTexture *texture,
-    const ImVec4 &clip_rect,
-    const ImDrawVert &v0,
-    const ImDrawVert &v1,
-    const ImDrawVert &v2)
-  {
-    const ImVec2 p0 = ImVec2(target.scale.x * v0.pos.x, target.scale.y * v0.pos.y);
-    const ImVec2 p1 = ImVec2(target.scale.x * v1.pos.x, target.scale.y * v1.pos.y);
-    const ImVec2 p2 = ImVec2(target.scale.x * v2.pos.x, target.scale.y * v2.pos.y);
+  // ImGui uses the first pixel for "white".
+  const ImVec2 white_uv = ImVec2(0.5f / texture->width, 0.5f / texture->height);
 
-    const auto rect_area = barycentric(p0, p1, p2);// Can be positive or negative depending on winding order
-    if (rect_area == 0.0f) { return; }
-    // if (rect_area < 0.0f) { return paint_triangle(target, texture, clip_rect, v0, v2, v1); }
+  for (unsigned int i = 0; i + 3 <= pcmd.ElemCount;) {
+    ImDrawVert v0 = vertices[idx_buffer[i + 0]];
+    v0.pos.x -= target.DisplayPos.x;
+    v0.pos.y -= target.DisplayPos.y;
+    ImDrawVert v1 = vertices[idx_buffer[i + 1]];
+    v1.pos.x -= target.DisplayPos.x;
+    v1.pos.y -= target.DisplayPos.y;
+    ImDrawVert v2 = vertices[idx_buffer[i + 2]];
+    v2.pos.x -= target.DisplayPos.x;
+    v2.pos.y -= target.DisplayPos.y;
 
-    // Find bounding box:
-    float min_x_f = min3(p0.x, p1.x, p2.x);
-    float min_y_f = min3(p0.y, p1.y, p2.y);
-    float max_x_f = max3(p0.x, p1.x, p2.x);
-    float max_y_f = max3(p0.y, p1.y, p2.y);
+    // Text is common, and is made of textured rectangles. So let's optimize for it.
+    // This assumes the ImGui way to layout text does not change.
+    if (options.optimize_text && i + 6 <= pcmd.ElemCount && idx_buffer[i + 3] == idx_buffer[i + 0]
+        && idx_buffer[i + 4] == idx_buffer[i + 2]) {
+      ImDrawVert v3 = vertices[idx_buffer[i + 5]];
+      v3.pos.x -= target.DisplayPos.x;
+      v3.pos.y -= target.DisplayPos.y;
 
-    // Clip against clip_rect:
-    min_x_f = std::max(min_x_f, target.scale.x * clip_rect.x - target.DisplayPos.x);
-    min_y_f = std::max(min_y_f, target.scale.y * clip_rect.y - target.DisplayPos.y);
-    max_x_f = std::min(max_x_f, target.scale.x * clip_rect.z - 0.5f - target.DisplayPos.x);
-    max_y_f = std::min(max_y_f, target.scale.y * clip_rect.w - 0.5f - target.DisplayPos.y);
+      if (v0.pos.x == v3.pos.x && v1.pos.x == v2.pos.x && v0.pos.y == v1.pos.y && v2.pos.y == v3.pos.y
+          && v0.uv.x == v3.uv.x && v1.uv.x == v2.uv.x && v0.uv.y == v1.uv.y && v2.uv.y == v3.uv.y) {
+        const bool has_uniform_color = v0.col == v1.col && v0.col == v2.col && v0.col == v3.col;
 
-    // Integer bounding box [min, max):
-    int min_x_i = static_cast<int>(min_x_f);
-    int min_y_i = static_cast<int>(min_y_f);
-    int max_x_i = static_cast<int>(max_x_f + 1.0f);
-    int max_y_i = static_cast<int>(max_y_f + 1.0f);
+        const bool has_texture = v0.uv != white_uv || v1.uv != white_uv || v2.uv != white_uv || v3.uv != white_uv;
 
-    // Clip against render target:
-    min_x_i = std::max(min_x_i, 0);
-    min_y_i = std::max(min_y_i, 0);
-    max_x_i = std::min(max_x_i, target.width);
-    max_y_i = std::min(max_y_i, target.height);
-
-    // ------------------------------------------------------------------------
-    // Set up interpolation of barycentric coordinates:
-
-    const auto topleft = ImVec2(min_x_i + 0.5f * target.scale.x, min_y_i + 0.5f * target.scale.y);
-    const auto dx = ImVec2(1, 0);
-    const auto dy = ImVec2(0, 1);
-
-    const auto w0_topleft = barycentric(p1, p2, topleft);
-    const auto w1_topleft = barycentric(p2, p0, topleft);
-    const auto w2_topleft = barycentric(p0, p1, topleft);
-
-    const auto w0_dx = barycentric(p1, p2, topleft + dx) - w0_topleft;
-    const auto w1_dx = barycentric(p2, p0, topleft + dx) - w1_topleft;
-    const auto w2_dx = barycentric(p0, p1, topleft + dx) - w2_topleft;
-
-    const auto w0_dy = barycentric(p1, p2, topleft + dy) - w0_topleft;
-    const auto w1_dy = barycentric(p2, p0, topleft + dy) - w1_topleft;
-    const auto w2_dy = barycentric(p0, p1, topleft + dy) - w2_topleft;
-
-    const Barycentric bary_0{ 1, 0, 0 };
-    const Barycentric bary_1{ 0, 1, 0 };
-    const Barycentric bary_2{ 0, 0, 1 };
-
-    const auto inv_area = 1 / rect_area;
-    const Barycentric bary_topleft = inv_area * (w0_topleft * bary_0 + w1_topleft * bary_1 + w2_topleft * bary_2);
-    const Barycentric bary_dx = inv_area * (w0_dx * bary_0 + w1_dx * bary_1 + w2_dx * bary_2);
-    const Barycentric bary_dy = inv_area * (w0_dy * bary_0 + w1_dy * bary_1 + w2_dy * bary_2);
-
-    Barycentric bary_current_row = bary_topleft;
-
-    // ------------------------------------------------------------------------
-    // For pixel-perfect inside/outside testing:
-
-    const int sign = rect_area > 0 ? 1 : -1;// winding order?
-
-    const int bias0i = is_dominant_edge(p2 - p1) ? 0 : -1;
-    const int bias1i = is_dominant_edge(p0 - p2) ? 0 : -1;
-    const int bias2i = is_dominant_edge(p1 - p0) ? 0 : -1;
-
-    const auto p0i = as_point(p0);
-    const auto p1i = as_point(p1);
-    const auto p2i = as_point(p2);
-
-    // ------------------------------------------------------------------------
-
-    const bool has_uniform_color = (v0.col == v1.col && v0.col == v2.col);
-
-    const ImVec4 c0 = color_convert_u32_to_float4(v0.col);
-    const ImVec4 c1 = color_convert_u32_to_float4(v1.col);
-    const ImVec4 c2 = color_convert_u32_to_float4(v2.col);
-
-    // We often blend the same colors over and over again, so optimize for this (saves 10% total cpu):
-    uint32_t last_target_pixel = 0;
-    const auto *lastColorRef = reinterpret_cast<const ColorInt *>(&last_target_pixel);
-    const auto *colorRef = reinterpret_cast<const ColorInt *>(&v0.col);
-    uint32_t last_output = blend(*lastColorRef, *colorRef);
-
-    for (int y = min_y_i; y < max_y_i; ++y) {
-      auto bary = bary_current_row;
-
-      bool has_been_inside_this_row = false;
-
-      for (int x = min_x_i; x < max_x_i; ++x) {
-        const auto w0 = bary.w0;
-        const auto w1 = bary.w1;
-        const auto w2 = bary.w2;
-        bary += bary_dx;
-
-        {
-          // Inside/outside test:
-          const auto p = Point{ kFixedBias * x + kFixedBias / 2, kFixedBias * y + kFixedBias / 2 };
-          const auto w0i = sign * orient2d(p1i, p2i, p) + bias0i;
-          const auto w1i = sign * orient2d(p2i, p0i, p) + bias1i;
-          const auto w2i = sign * orient2d(p0i, p1i, p) + bias2i;
-          if (w0i < 0 || w1i < 0 || w2i < 0) {
-            if (has_been_inside_this_row) {
-              break;// Gives a nice 10% speedup
-            } else {
-              continue;
-            }
-          }
-        }
-        has_been_inside_this_row = true;
-
-        uint32_t &target_pixel = target.pixels[y * target.width + x];
-
-        if (has_uniform_color && !texture) {
-          if (target_pixel == last_target_pixel) {
-            target_pixel = last_output;
-            continue;
-          }
-          last_target_pixel = target_pixel;
-          target_pixel = blend(*lastColorRef, *colorRef);
-          last_output = target_pixel;
+        if (has_uniform_color && has_texture) {
+          paint_uniform_textured_rectangle(target, *texture, pcmd.ClipRect, v0, v2);
+          i += 6;
           continue;
         }
+      }
+    }
 
-        ImVec4 src_color;
+    // A lot of the big stuff are uniformly colored rectangles,
+    // so we can save a lot of CPU by detecting them:
+    if (options.optimize_rectangles && i + 6 <= pcmd.ElemCount) {
+      ImDrawVert v3 = vertices[idx_buffer[i + 3]];
+      v3.pos.x -= target.DisplayPos.x;
+      v3.pos.y -= target.DisplayPos.y;
+      ImDrawVert v4 = vertices[idx_buffer[i + 4]];
+      v4.pos.x -= target.DisplayPos.x;
+      v4.pos.y -= target.DisplayPos.y;
+      ImDrawVert v5 = vertices[idx_buffer[i + 5]];
+      v5.pos.x -= target.DisplayPos.x;
+      v5.pos.y -= target.DisplayPos.y;
+
+      ImVec2 min, max;
+      min.x = min3(v0.pos.x - target.DisplayPos.x, v1.pos.x - target.DisplayPos.x, v2.pos.x - target.DisplayPos.x);
+      min.y = min3(v0.pos.y - target.DisplayPos.y, v1.pos.y - target.DisplayPos.y, v2.pos.y - target.DisplayPos.y);
+      max.x = max3(v0.pos.x - target.DisplayPos.x, v1.pos.x - target.DisplayPos.x, v2.pos.x - target.DisplayPos.x);
+      max.y = max3(v0.pos.y - target.DisplayPos.y, v1.pos.y - target.DisplayPos.y, v2.pos.y - target.DisplayPos.y);
+
+      // Not the prettiest way to do this, but it catches all cases
+      // of a rectangle split into two triangles.
+      // TODO: Stop it from also assuming duplicate triangles is one rectangle.
+      if ((v0.pos.x == min.x || v0.pos.x == max.x) && (v0.pos.y == min.y || v0.pos.y == max.y)
+          && (v1.pos.x == min.x || v1.pos.x == max.x) && (v1.pos.y == min.y || v1.pos.y == max.y)
+          && (v2.pos.x == min.x || v2.pos.x == max.x) && (v2.pos.y == min.y || v2.pos.y == max.y)
+          && (v3.pos.x == min.x || v3.pos.x == max.x) && (v3.pos.y == min.y || v3.pos.y == max.y)
+          && (v4.pos.x == min.x || v4.pos.x == max.x) && (v4.pos.y == min.y || v4.pos.y == max.y)
+          && (v5.pos.x == min.x || v5.pos.x == max.x) && (v5.pos.y == min.y || v5.pos.y == max.y)) {
+        const bool has_uniform_color =
+          v0.col == v1.col && v0.col == v2.col && v0.col == v3.col && v0.col == v4.col && v0.col == v5.col;
+
+        min.x = std::max(min.x, pcmd.ClipRect.x - target.DisplayPos.x);
+        min.y = std::max(min.y, pcmd.ClipRect.y - target.DisplayPos.y);
+        max.x = std::min(max.x, pcmd.ClipRect.z - 0.5f - target.DisplayPos.x);
+        max.y = std::min(max.y, pcmd.ClipRect.w - 0.5f - target.DisplayPos.y);
+
+        if (max.x < min.x || max.y < min.y) {
+          i += 6;
+          continue;
+        }// Completely clipped
 
         if (has_uniform_color) {
-          src_color = c0;
-        } else {
-          src_color = w0 * c0 + w1 * c1 + w2 * c2;
-        }
-
-        if (texture) {
-          if (!texture->isAlpha) { printf("warning: different texture\n"); }
-
-          const ImVec2 uv = w0 * v0.uv + w1 * v1.uv + w2 * v2.uv;
-          int x = uv.x * (texture->width - 1.0f) + 0.5f;
-          int y = uv.y * (texture->height - 1.0f) + 0.5f;
-          src_color.w *= sample_font_texture(*texture, x, y) / 255.0f;
-        }
-
-        if (src_color.w <= 0.0f) { continue; }// Transparent.
-        if (src_color.w >= 1.0f) {
-          // Opaque, no blending needed:
-          target_pixel = color_convert_float4_to_u32(src_color);
+          const auto *colorRef = reinterpret_cast<const ColorInt *>(&v0.col);
+          paint_uniform_rectangle(target, min, max, *colorRef);
+          i += 6;
           continue;
         }
-
-        ImVec4 target_color = color_convert_u32_to_float4(target_pixel);
-        const auto blended_color = src_color.w * src_color + (1.0f - src_color.w) * target_color;
-        target_pixel = color_convert_float4_to_u32(blended_color);
       }
-
-      bary_current_row += bary_dy;
     }
+
+    const bool has_texture = (v0.uv != white_uv || v1.uv != white_uv || v2.uv != white_uv);
+    paint_triangle(target, has_texture ? texture : nullptr, pcmd.ClipRect, v0, v1, v2);
+    i += 3;
   }
+}
 
-  void paint_draw_cmd(const PaintTarget &target,
-    const ImDrawVert *vertices,
-    const ImDrawIdx *idx_buffer,
-    const ImDrawCmd &pcmd,
-    const SwOptions &options)
-  {
-    const auto texture = reinterpret_cast<const SWTexture *>(pcmd.TextureId);
-    IM_ASSERT(texture);
-
-    // ImGui uses the first pixel for "white".
-    const ImVec2 white_uv = ImVec2(0.5f / texture->width, 0.5f / texture->height);
-
-    for (unsigned int i = 0; i + 3 <= pcmd.ElemCount;) {
-      ImDrawVert v0 = vertices[idx_buffer[i + 0]];
-      v0.pos.x -= target.DisplayPos.x;
-      v0.pos.y -= target.DisplayPos.y;
-      ImDrawVert v1 = vertices[idx_buffer[i + 1]];
-      v1.pos.x -= target.DisplayPos.x;
-      v1.pos.y -= target.DisplayPos.y;
-      ImDrawVert v2 = vertices[idx_buffer[i + 2]];
-      v2.pos.x -= target.DisplayPos.x;
-      v2.pos.y -= target.DisplayPos.y;
-
-      // Text is common, and is made of textured rectangles. So let's optimize for it.
-      // This assumes the ImGui way to layout text does not change.
-      if (options.optimize_text && i + 6 <= pcmd.ElemCount && idx_buffer[i + 3] == idx_buffer[i + 0]
-          && idx_buffer[i + 4] == idx_buffer[i + 2]) {
-        ImDrawVert v3 = vertices[idx_buffer[i + 5]];
-        v3.pos.x -= target.DisplayPos.x;
-        v3.pos.y -= target.DisplayPos.y;
-
-        if (v0.pos.x == v3.pos.x && v1.pos.x == v2.pos.x && v0.pos.y == v1.pos.y && v2.pos.y == v3.pos.y
-            && v0.uv.x == v3.uv.x && v1.uv.x == v2.uv.x && v0.uv.y == v1.uv.y && v2.uv.y == v3.uv.y) {
-          const bool has_uniform_color = v0.col == v1.col && v0.col == v2.col && v0.col == v3.col;
-
-          const bool has_texture = v0.uv != white_uv || v1.uv != white_uv || v2.uv != white_uv || v3.uv != white_uv;
-
-          if (has_uniform_color && has_texture) {
-            paint_uniform_textured_rectangle(target, *texture, pcmd.ClipRect, v0, v2);
-            i += 6;
-            continue;
-          }
-        }
-      }
-
-      // A lot of the big stuff are uniformly colored rectangles,
-      // so we can save a lot of CPU by detecting them:
-      if (options.optimize_rectangles && i + 6 <= pcmd.ElemCount) {
-        ImDrawVert v3 = vertices[idx_buffer[i + 3]];
-        v3.pos.x -= target.DisplayPos.x;
-        v3.pos.y -= target.DisplayPos.y;
-        ImDrawVert v4 = vertices[idx_buffer[i + 4]];
-        v4.pos.x -= target.DisplayPos.x;
-        v4.pos.y -= target.DisplayPos.y;
-        ImDrawVert v5 = vertices[idx_buffer[i + 5]];
-        v5.pos.x -= target.DisplayPos.x;
-        v5.pos.y -= target.DisplayPos.y;
-
-        ImVec2 min, max;
-        min.x = min3(v0.pos.x - target.DisplayPos.x, v1.pos.x - target.DisplayPos.x, v2.pos.x - target.DisplayPos.x);
-        min.y = min3(v0.pos.y - target.DisplayPos.y, v1.pos.y - target.DisplayPos.y, v2.pos.y - target.DisplayPos.y);
-        max.x = max3(v0.pos.x - target.DisplayPos.x, v1.pos.x - target.DisplayPos.x, v2.pos.x - target.DisplayPos.x);
-        max.y = max3(v0.pos.y - target.DisplayPos.y, v1.pos.y - target.DisplayPos.y, v2.pos.y - target.DisplayPos.y);
-
-        // Not the prettiest way to do this, but it catches all cases
-        // of a rectangle split into two triangles.
-        // TODO: Stop it from also assuming duplicate triangles is one rectangle.
-        if ((v0.pos.x == min.x || v0.pos.x == max.x) && (v0.pos.y == min.y || v0.pos.y == max.y)
-            && (v1.pos.x == min.x || v1.pos.x == max.x) && (v1.pos.y == min.y || v1.pos.y == max.y)
-            && (v2.pos.x == min.x || v2.pos.x == max.x) && (v2.pos.y == min.y || v2.pos.y == max.y)
-            && (v3.pos.x == min.x || v3.pos.x == max.x) && (v3.pos.y == min.y || v3.pos.y == max.y)
-            && (v4.pos.x == min.x || v4.pos.x == max.x) && (v4.pos.y == min.y || v4.pos.y == max.y)
-            && (v5.pos.x == min.x || v5.pos.x == max.x) && (v5.pos.y == min.y || v5.pos.y == max.y)) {
-          const bool has_uniform_color =
-            v0.col == v1.col && v0.col == v2.col && v0.col == v3.col && v0.col == v4.col && v0.col == v5.col;
-
-          min.x = std::max(min.x, pcmd.ClipRect.x - target.DisplayPos.x);
-          min.y = std::max(min.y, pcmd.ClipRect.y - target.DisplayPos.y);
-          max.x = std::min(max.x, pcmd.ClipRect.z - 0.5f - target.DisplayPos.x);
-          max.y = std::min(max.y, pcmd.ClipRect.w - 0.5f - target.DisplayPos.y);
-
-          if (max.x < min.x || max.y < min.y) {
-            i += 6;
-            continue;
-          }// Completely clipped
-
-          if (has_uniform_color) {
-            const auto *colorRef = reinterpret_cast<const ColorInt *>(&v0.col);
-            paint_uniform_rectangle(target, min, max, *colorRef);
-            i += 6;
-            continue;
-          }
-        }
-      }
-
-      const bool has_texture = (v0.uv != white_uv || v1.uv != white_uv || v2.uv != white_uv);
-      paint_triangle(target, has_texture ? texture : nullptr, pcmd.ClipRect, v0, v1, v2);
-      i += 3;
-    }
-  }
-
-  void paint_draw_list(const PaintTarget &target, const ImDrawList *cmd_list, const SwOptions &options)
-  {
-    const ImDrawIdx *idx_buffer = &cmd_list->IdxBuffer[0];
-    const ImDrawVert *vertices = cmd_list->VtxBuffer.Data;
-
-    for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.size(); cmd_i++) {
-      const ImDrawCmd &pcmd = cmd_list->CmdBuffer[cmd_i];
-      if (pcmd.UserCallback) {
-        pcmd.UserCallback(cmd_list, &pcmd);
-      } else {
-        paint_draw_cmd(target, vertices, idx_buffer, pcmd, options);
-      }
-      idx_buffer += pcmd.ElemCount;
-    }
-  }
-
-}// namespace
-
-void paint_imgui(uint32_t *pixels, ImDrawData *drawData, const SwOptions &options = {})
+static void paint_draw_list(const PaintTarget &target, const ImDrawList *cmd_list, const SwOptions &options)
+{
+  const ImDrawIdx *idx_buffer = &cmd_list->IdxBuffer[0];
+  const ImDrawVert *vertices = cmd_list->VtxBuffer.Data;
+
+  for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.size(); cmd_i++) {
+    const ImDrawCmd &pcmd = cmd_list->CmdBuffer[cmd_i];
+    if (pcmd.UserCallback) {
+      pcmd.UserCallback(cmd_list, &pcmd);
+    } else {
+      paint_draw_cmd(target, vertices, idx_buffer, pcmd, options);
+    }
+    idx_buffer += pcmd.ElemCount;
+  }
+  }
+
+static void paint_imgui(uint32_t *pixels, ImDrawData *drawData, int fb_width, int fb_height, const SwOptions &options = {})
 {
-  int fb_width = (int)(drawData->DisplaySize.x * drawData->FramebufferScale.x);
-  int fb_height = (int)(drawData->DisplaySize.y * drawData->FramebufferScale.y);
   if (fb_width <= 0 || fb_height <= 0) return;
 
-  PaintTarget target{ pixels, fb_width, fb_height, drawData->FramebufferScale, drawData->DisplayPos };
+  PaintTarget target{ pixels, fb_width, fb_height, drawData->DisplayPos };
 
   for (int i = 0; i < drawData->CmdListsCount; ++i) {
     paint_draw_list(target, drawData->CmdLists[i], options);
@@ -662,7 +658,7 @@ void ImGui_ImplSW_RenderDrawData(ImDrawData* draw_data) {
   if (mustLock) {
     if (SDL_LockSurface(surf)!=0) return;
   }
-  paint_imgui((uint32_t*)surf->pixels,draw_data);
+  paint_imgui((uint32_t*)surf->pixels,draw_data,surf->w,surf->h);
   if (mustLock) {
     SDL_UnlockSurface(surf);
   }