[poppler] [PATCH 4/4] SplashXPathScanner: Reduce complexity of sorting spans

Stefan BrĂ¼ns stefan.bruens at rwth-aachen.de
Sat May 26 02:34:51 UTC 2018


For complex paths, a significant amount of time is spent in
SplashXPathScanner::computeIntersections, more specifically with
sorting the spans in y/x order.

Instead of using one large array for all spans, use a 2-dimensional
structure.  As the number of y positions is known upfront, it is
possible to create an array for the y dimension and insert the spans
directly at the appropriate position.

To avoid large number of allocations for common simple polygons,
boost::container::small_vector<4, T> is used, which stores up to
4 intersections inline. small_vector is a header-only class.

For Y rows with X spans per row, this reduces the complexity for sorting from
O( Y*X log Y*X) to O( Y * X log X), i.e. a reduction by log Y.

For the document from fdo#96728, the runtime/memory is significantly reduced:
$> pdftoppm -r 18 -aa no runsforever-poppler.pdf

Before:
- User time (seconds): 2979.80
- Maximum resident set size (kbytes): 51208

After:
- User time (seconds): 1773.53
- Maximum resident set size (kbytes): 45896

---
 splash/SplashXPathScanner.cc | 176 +++++++++++++++++--------------------------
 splash/SplashXPathScanner.h  |  16 ++--
 2 files changed, 82 insertions(+), 110 deletions(-)

diff --git a/splash/SplashXPathScanner.cc b/splash/SplashXPathScanner.cc
index 9c46f9dd..0e2caa3c 100644
--- a/splash/SplashXPathScanner.cc
+++ b/splash/SplashXPathScanner.cc
@@ -37,17 +37,6 @@
 
 //------------------------------------------------------------------------
 
-struct SplashIntersect {
-  int y;
-  int x0, x1;			// intersection of segment with [y, y+1)
-  int count;			// EO/NZWN counter increment
-};
-
-struct cmpIntersectFunctor {
-  bool operator()(const SplashIntersect &i0, const SplashIntersect &i1) {
-    return (i0.y != i1.y) ? (i0.y < i1.y) : (i0.x0 < i1.x0);
-  }
-};
 
 //------------------------------------------------------------------------
 // SplashXPathScanner
@@ -119,14 +108,10 @@ SplashXPathScanner::SplashXPathScanner(SplashXPath *xPathA, GBool eoA,
     }
   }
 
-  allInter = nullptr;
-  inter = nullptr;
   computeIntersections();
 }
 
 SplashXPathScanner::~SplashXPathScanner() {
-  gfree(inter);
-  gfree(allInter);
 }
 
 void SplashXPathScanner::getBBoxAA(int *xMinA, int *yMinA,
@@ -141,17 +126,17 @@ void SplashXPathScanner::getSpanBounds(int y, int *spanXMin, int *spanXMax) {
   int interBegin, interEnd, xx, i;
 
   if (y < yMin || y > yMax) {
-    interBegin = interEnd = 0;
-  } else {
-    interBegin = inter[y - yMin];
-    interEnd = inter[y - yMin + 1];
+    *spanXMin = xMax + 1;
+    *spanXMax = xMax;
+    return;
   }
-  if (interBegin < interEnd) {
-    *spanXMin = allInter[interBegin].x0;
-    xx = allInter[interBegin].x1;
-    for (i = interBegin + 1; i < interEnd; ++i) {
-      if (allInter[i].x1 > xx) {
-	xx = allInter[i].x1;
+  const auto& line = allIntersections[y - yMin];
+  if (!line.empty()) {
+    *spanXMin = line[0].x0;
+    xx = line[0].x1;
+    for (i = 1; i < line.size(); ++i) {
+      if (line[i].x1 > xx) {
+	xx = line[i].x1;
       }
     }
     *spanXMax = xx;
@@ -167,14 +152,13 @@ GBool SplashXPathScanner::test(int x, int y) {
   if (y < yMin || y > yMax) {
     return gFalse;
   }
-  interBegin = inter[y - yMin];
-  interEnd = inter[y - yMin + 1];
+  const auto& line = allIntersections[y - yMin];
   count = 0;
-  for (i = interBegin; i < interEnd && allInter[i].x0 <= x; ++i) {
-    if (x <= allInter[i].x1) {
+  for (i = 0; i < line.size() && line[i].x0 <= x; ++i) {
+    if (x <= line[i].x1) {
       return gTrue;
     }
-    count += allInter[i].count;
+    count += line[i].count;
   }
   return eo ? (count & 1) : (count != 0);
 }
@@ -185,27 +169,26 @@ GBool SplashXPathScanner::testSpan(int x0, int x1, int y) {
   if (y < yMin || y > yMax) {
     return gFalse;
   }
-  interBegin = inter[y - yMin];
-  interEnd = inter[y - yMin + 1];
+  const auto& line = allIntersections[y - yMin];
   count = 0;
-  for (i = interBegin; i < interEnd && allInter[i].x1 < x0; ++i) {
-    count += allInter[i].count;
+  for (i = 0; i < line.size() && line[i].x1 < x0; ++i) {
+    count += line[i].count;
   }
 
   // invariant: the subspan [x0,xx1] is inside the path
   xx1 = x0 - 1;
   while (xx1 < x1) {
-    if (i >= interEnd) {
+    if (i >= line.size()) {
       return gFalse;
     }
-    if (allInter[i].x0 > xx1 + 1 &&
+    if (line[i].x0 > xx1 + 1 &&
 	!(eo ? (count & 1) : (count != 0))) {
       return gFalse;
     }
-    if (allInter[i].x1 > xx1) {
-      xx1 = allInter[i].x1;
+    if (line[i].x1 > xx1) {
+      xx1 = line[i].x1;
     }
-    count += allInter[i].count;
+    count += line[i].count;
     ++i;
   }
 
@@ -215,21 +198,21 @@ GBool SplashXPathScanner::testSpan(int x0, int x1, int y) {
 GBool SplashXPathScanner::getNextSpan(int y, int *x0, int *x1) {
   int interEnd, xx0, xx1;
 
-  interEnd = inter[y - yMin + 1];
-  if (interIdx >= interEnd) {
+  const auto& line = allIntersections[y - yMin];
+  if (interIdx >= line.size()) {
     return gFalse;
   }
-  xx0 = allInter[interIdx].x0;
-  xx1 = allInter[interIdx].x1;
-  interCount += allInter[interIdx].count;
+  xx0 = line[interIdx].x0;
+  xx1 = line[interIdx].x1;
+  interCount += line[interIdx].count;
   ++interIdx;
-  while (interIdx < interEnd &&
-	 (allInter[interIdx].x0 <= xx1 ||
+  while (interIdx < line.size() &&
+	 (line[interIdx].x0 <= xx1 ||
 	  (eo ? (interCount & 1) : (interCount != 0)))) {
-    if (allInter[interIdx].x1 > xx1) {
-      xx1 = allInter[interIdx].x1;
+    if (line[interIdx].x1 > xx1) {
+      xx1 = line[interIdx].x1;
     }
-    interCount += allInter[interIdx].count;
+    interCount += line[interIdx].count;
     ++interIdx;
   }
   *x0 = xx0;
@@ -243,7 +226,7 @@ GBool SplashXPathScanner::setRow(int y) {
   }
 
   interCount = 0;
-  interIdx = inter[y - yMin];
+  interIdx = 0;
   return gTrue;
 }
 
@@ -257,10 +240,8 @@ void SplashXPathScanner::computeIntersections() {
   }
 
   // build the list of all intersections
-  allInterLen = 0;
-  allInterSize = 16;
-  allInter = (SplashIntersect *)gmallocn(allInterSize,
-					 sizeof(SplashIntersect));
+  allIntersections.resize(yMax - yMin + 1);
+
   for (i = 0; i < xPath->length; ++i) {
     seg = &xPath->segs[i];
     if (seg->flags & splashXPathFlip) {
@@ -330,50 +311,35 @@ void SplashXPathScanner::computeIntersections() {
       }
     }
   }
-  std::sort(allInter, allInter + allInterLen, cmpIntersectFunctor());
-
-  // build the list of y pointers
-  inter = (int *)gmallocn(yMax - yMin + 2, sizeof(int));
-  i = 0;
-  for (y = yMin; y <= yMax; ++y) {
-    inter[y - yMin] = i;
-    while (i < allInterLen && allInter[i].y <= y) {
-      ++i;
-    }
+  for (auto& line : allIntersections) {
+    std::sort(line.begin(), line.end(),
+              [](const SplashIntersect &i0, const SplashIntersect &i1) {
+                return i0.x0 < i1.x0;
+              });
   }
-  inter[yMax - yMin + 1] = i;
 }
 
 GBool SplashXPathScanner::addIntersection(double segYMin, double segYMax,
 					 Guint segFlags,
 					 int y, int x0, int x1) {
-  if (allInterLen == allInterSize) {
-    unsigned int newInterSize = ((unsigned int) allInterSize * 2 > INT_MAX / sizeof(SplashIntersect)) ? allInterSize + 32768 : allInterSize * 2;
-    if (newInterSize >= INT_MAX / sizeof(SplashIntersect)) {
-      error(errInternal, -1, "Bogus memory allocation size in SplashXPathScanner::addIntersection {0:d}", newInterSize);
-      return gFalse;
-    }
-    allInterSize = newInterSize;
-    allInter = (SplashIntersect *)greallocn(allInter, newInterSize,
-					    sizeof(SplashIntersect));
-  }
-  allInter[allInterLen].y = y;
+  SplashIntersect intersect;
+  intersect.y = y;
   if (x0 < x1) {
-    allInter[allInterLen].x0 = x0;
-    allInter[allInterLen].x1 = x1;
+    intersect.x0 = x0;
+    intersect.x1 = x1;
   } else {
-    allInter[allInterLen].x0 = x1;
-    allInter[allInterLen].x1 = x0;
+    intersect.x0 = x1;
+    intersect.x1 = x0;
   }
   if (segYMin <= y &&
       (SplashCoord)y < segYMax &&
       !(segFlags & splashXPathHoriz)) {
-    allInter[allInterLen].count = eo ? 1
+    intersect.count = eo ? 1
                                      : (segFlags & splashXPathFlip) ? 1 : -1;
   } else {
-    allInter[allInterLen].count = 0;
+    intersect.count = 0;
   }
-  ++allInterLen;
+  allIntersections[y - yMin].push_back(intersect);
   return gTrue;
 }
 
@@ -396,23 +362,23 @@ void SplashXPathScanner::renderAALine(SplashBitmap *aaBuf,
     if (yyMax + splashAASize * y > yMax) {
       yyMax = yMax - splashAASize * y;
     }
-    interIdx = inter[splashAASize * y + yy - yMin];
 
     for (; yy <= yyMax; ++yy) {
-      interEnd = inter[splashAASize * y + yy - yMin + 1];
+      const auto& line = allIntersections[splashAASize * y + yy - yMin];
+      interIdx = 0;
       interCount = 0;
-      while (interIdx < interEnd) {
-	xx0 = allInter[interIdx].x0;
-	xx1 = allInter[interIdx].x1;
-	interCount += allInter[interIdx].count;
+      while (interIdx < line.size()) {
+	xx0 = line[interIdx].x0;
+	xx1 = line[interIdx].x1;
+	interCount += line[interIdx].count;
 	++interIdx;
-	while (interIdx < interEnd &&
-	       (allInter[interIdx].x0 <= xx1 ||
+	while (interIdx < line.size() &&
+	       (line[interIdx].x0 <= xx1 ||
 		(eo ? (interCount & 1) : (interCount != 0)))) {
-	  if (allInter[interIdx].x1 > xx1) {
-	    xx1 = allInter[interIdx].x1;
+	  if (line[interIdx].x1 > xx1) {
+	    xx1 = line[interIdx].x1;
 	  }
-	  interCount += allInter[interIdx].count;
+	  interCount += line[interIdx].count;
 	  ++interIdx;
 	}
 	if (xx0 < 0) {
@@ -475,21 +441,21 @@ void SplashXPathScanner::clipAALine(SplashBitmap *aaBuf,
   for (yy = 0; yy < splashAASize; ++yy) {
     xx = *x0 * splashAASize;
     if (yy >= yyMin && yy <= yyMax) {
-      interIdx = inter[splashAASize * y + yy - yMin];
-      interEnd = inter[splashAASize * y + yy - yMin + 1];
+      const auto& line = allIntersections[splashAASize * y + yy - yMin];
+      interIdx = 0;
       interCount = 0;
-      while (interIdx < interEnd && xx < (*x1 + 1) * splashAASize) {
-	xx0 = allInter[interIdx].x0;
-	xx1 = allInter[interIdx].x1;
-	interCount += allInter[interIdx].count;
+      while (interIdx < line.size() && xx < (*x1 + 1) * splashAASize) {
+	xx0 = line[interIdx].x0;
+	xx1 = line[interIdx].x1;
+	interCount += line[interIdx].count;
 	++interIdx;
-	while (interIdx < interEnd &&
-	       (allInter[interIdx].x0 <= xx1 ||
+	while (interIdx < line.size() &&
+	       (line[interIdx].x0 <= xx1 ||
 		(eo ? (interCount & 1) : (interCount != 0)))) {
-	  if (allInter[interIdx].x1 > xx1) {
-	    xx1 = allInter[interIdx].x1;
+	  if (line[interIdx].x1 > xx1) {
+	    xx1 = line[interIdx].x1;
 	  }
-	  interCount += allInter[interIdx].count;
+	  interCount += line[interIdx].count;
 	  ++interIdx;
 	}
 	if (xx0 > aaBuf->getWidth()) {
diff --git a/splash/SplashXPathScanner.h b/splash/SplashXPathScanner.h
index 99f2c28f..349d1e00 100644
--- a/splash/SplashXPathScanner.h
+++ b/splash/SplashXPathScanner.h
@@ -27,10 +27,17 @@
 #endif
 
 #include "SplashTypes.h"
+#include <boost/container/small_vector.hpp>
+#include <vector>
 
 class SplashXPath;
 class SplashBitmap;
-struct SplashIntersect;
+
+struct SplashIntersect {
+  int y;
+  int x0, x1;			// intersection of segment with [y, y+1)
+  int count;			// EO/NZWN counter increment
+};
 
 //------------------------------------------------------------------------
 // SplashXPathScanner
@@ -99,10 +106,9 @@ private:
   int xMin, yMin, xMax, yMax;
   GBool partialClip;
 
-  SplashIntersect *allInter;	// array of intersections
-  int allInterLen;		// number of intersections in <allInter>
-  int allInterSize;		// size of the <allInter> array
-  int *inter;			// indexes into <allInter> for each y value
+  typedef boost::container::small_vector<SplashIntersect, 4> IntersectionLine;
+  std::vector<IntersectionLine> allIntersections;
+
   int interIdx;			// current index into <inter> - used by
 				//   getNextSpan 
   int interCount;		// current EO/NZWN counter - used by
-- 
2.16.3



More information about the poppler mailing list