[poppler] [PATCH v2 4/5] SplashXPathScanner: Reduce complexity of sorting spans

Stefan BrĂ¼ns stefan.bruens at rwth-aachen.de
Sat May 26 17:51:23 UTC 2018


For complex paths, a significant amount of time is spent in
SplashXPathScanner::computeIntersections, more specifically with
sorting the spans in y/x order.

Instead of using one large array for all spans, use a 2-dimensional
structure.  As the number of y positions is known upfront, it is
possible to create an array for the y dimension and insert the spans
directly at the appropriate position.

For Y rows with X spans per row, this reduces the complexity for sorting
from O( Y*X log Y*X) to O( Y * X log X), i.e. a reduction by log Y.

For the documents from fdo#96728 and fdo#78728, the runtime/memory is
significantly reduced (according to /usr/bin/time -v):
(1) $> pdftoppm -r 18 -aa no runsforever-poppler.pdf
(2) $> pdftoppm surf-types.pdf

Before/After
                                  runsforever-poppler |    surf-types
User time (seconds):                2979.80 / 2348.08 |  9.45 /  7.76
Maximum resident set size (kbytes):   51208 /   46288 | 18084 / 14076

---
v2: split addition of boost::container::small_vector use into separate patch
    reserve space for 4 elements on first insertion
    updated benchmark results, add second document
    remove some no longer used variables
    fix signedness when comparing with vector<>::size()
---
 splash/SplashXPathScanner.cc | 202 ++++++++++++++++++-------------------------
 splash/SplashXPathScanner.h  |  18 ++--
 2 files changed, 97 insertions(+), 123 deletions(-)

diff --git a/splash/SplashXPathScanner.cc b/splash/SplashXPathScanner.cc
index 9c46f9dd..444b8f96 100644
--- a/splash/SplashXPathScanner.cc
+++ b/splash/SplashXPathScanner.cc
@@ -37,17 +37,6 @@
 
 //------------------------------------------------------------------------
 
-struct SplashIntersect {
-  int y;
-  int x0, x1;			// intersection of segment with [y, y+1)
-  int count;			// EO/NZWN counter increment
-};
-
-struct cmpIntersectFunctor {
-  bool operator()(const SplashIntersect &i0, const SplashIntersect &i1) {
-    return (i0.y != i1.y) ? (i0.y < i1.y) : (i0.x0 < i1.x0);
-  }
-};
 
 //------------------------------------------------------------------------
 // SplashXPathScanner
@@ -119,14 +108,10 @@ SplashXPathScanner::SplashXPathScanner(SplashXPath *xPathA, GBool eoA,
     }
   }
 
-  allInter = nullptr;
-  inter = nullptr;
   computeIntersections();
 }
 
 SplashXPathScanner::~SplashXPathScanner() {
-  gfree(inter);
-  gfree(allInter);
 }
 
 void SplashXPathScanner::getBBoxAA(int *xMinA, int *yMinA,
@@ -138,20 +123,18 @@ void SplashXPathScanner::getBBoxAA(int *xMinA, int *yMinA,
 }
 
 void SplashXPathScanner::getSpanBounds(int y, int *spanXMin, int *spanXMax) {
-  int interBegin, interEnd, xx, i;
-
   if (y < yMin || y > yMax) {
-    interBegin = interEnd = 0;
-  } else {
-    interBegin = inter[y - yMin];
-    interEnd = inter[y - yMin + 1];
+    *spanXMin = xMax + 1;
+    *spanXMax = xMax;
+    return;
   }
-  if (interBegin < interEnd) {
-    *spanXMin = allInter[interBegin].x0;
-    xx = allInter[interBegin].x1;
-    for (i = interBegin + 1; i < interEnd; ++i) {
-      if (allInter[i].x1 > xx) {
-	xx = allInter[i].x1;
+  const auto& line = allIntersections[y - yMin];
+  if (!line.empty()) {
+    *spanXMin = line[0].x0;
+    int xx = line[0].x1;
+    for (unsigned int i = 1; i < line.size(); ++i) {
+      if (line[i].x1 > xx) {
+	xx = line[i].x1;
       }
     }
     *spanXMax = xx;
@@ -162,50 +145,46 @@ void SplashXPathScanner::getSpanBounds(int y, int *spanXMin, int *spanXMax) {
 }
 
 GBool SplashXPathScanner::test(int x, int y) {
-  int interBegin, interEnd, count, i;
-
   if (y < yMin || y > yMax) {
     return gFalse;
   }
-  interBegin = inter[y - yMin];
-  interEnd = inter[y - yMin + 1];
-  count = 0;
-  for (i = interBegin; i < interEnd && allInter[i].x0 <= x; ++i) {
-    if (x <= allInter[i].x1) {
+  const auto& line = allIntersections[y - yMin];
+  int count = 0;
+  for (unsigned int i = 0; i < line.size() && line[i].x0 <= x; ++i) {
+    if (x <= line[i].x1) {
       return gTrue;
     }
-    count += allInter[i].count;
+    count += line[i].count;
   }
   return eo ? (count & 1) : (count != 0);
 }
 
 GBool SplashXPathScanner::testSpan(int x0, int x1, int y) {
-  int interBegin, interEnd, count, xx1, i;
+  unsigned int i;
 
   if (y < yMin || y > yMax) {
     return gFalse;
   }
-  interBegin = inter[y - yMin];
-  interEnd = inter[y - yMin + 1];
-  count = 0;
-  for (i = interBegin; i < interEnd && allInter[i].x1 < x0; ++i) {
-    count += allInter[i].count;
+  const auto& line = allIntersections[y - yMin];
+  int count = 0;
+  for (i = 0; i < line.size() && line[i].x1 < x0; ++i) {
+    count += line[i].count;
   }
 
   // invariant: the subspan [x0,xx1] is inside the path
-  xx1 = x0 - 1;
+  int xx1 = x0 - 1;
   while (xx1 < x1) {
-    if (i >= interEnd) {
+    if (i >= line.size()) {
       return gFalse;
     }
-    if (allInter[i].x0 > xx1 + 1 &&
+    if (line[i].x0 > xx1 + 1 &&
 	!(eo ? (count & 1) : (count != 0))) {
       return gFalse;
     }
-    if (allInter[i].x1 > xx1) {
-      xx1 = allInter[i].x1;
+    if (line[i].x1 > xx1) {
+      xx1 = line[i].x1;
     }
-    count += allInter[i].count;
+    count += line[i].count;
     ++i;
   }
 
@@ -213,23 +192,23 @@ GBool SplashXPathScanner::testSpan(int x0, int x1, int y) {
 }
 
 GBool SplashXPathScanner::getNextSpan(int y, int *x0, int *x1) {
-  int interEnd, xx0, xx1;
+  int xx0, xx1;
 
-  interEnd = inter[y - yMin + 1];
-  if (interIdx >= interEnd) {
+  const auto& line = allIntersections[y - yMin];
+  if (interIdx >= line.size()) {
     return gFalse;
   }
-  xx0 = allInter[interIdx].x0;
-  xx1 = allInter[interIdx].x1;
-  interCount += allInter[interIdx].count;
+  xx0 = line[interIdx].x0;
+  xx1 = line[interIdx].x1;
+  interCount += line[interIdx].count;
   ++interIdx;
-  while (interIdx < interEnd &&
-	 (allInter[interIdx].x0 <= xx1 ||
+  while (interIdx < line.size() &&
+	 (line[interIdx].x0 <= xx1 ||
 	  (eo ? (interCount & 1) : (interCount != 0)))) {
-    if (allInter[interIdx].x1 > xx1) {
-      xx1 = allInter[interIdx].x1;
+    if (line[interIdx].x1 > xx1) {
+      xx1 = line[interIdx].x1;
     }
-    interCount += allInter[interIdx].count;
+    interCount += line[interIdx].count;
     ++interIdx;
   }
   *x0 = xx0;
@@ -243,7 +222,7 @@ GBool SplashXPathScanner::setRow(int y) {
   }
 
   interCount = 0;
-  interIdx = inter[y - yMin];
+  interIdx = 0;
   return gTrue;
 }
 
@@ -257,10 +236,8 @@ void SplashXPathScanner::computeIntersections() {
   }
 
   // build the list of all intersections
-  allInterLen = 0;
-  allInterSize = 16;
-  allInter = (SplashIntersect *)gmallocn(allInterSize,
-					 sizeof(SplashIntersect));
+  allIntersections.resize(yMax - yMin + 1);
+
   for (i = 0; i < xPath->length; ++i) {
     seg = &xPath->segs[i];
     if (seg->flags & splashXPathFlip) {
@@ -330,56 +307,47 @@ void SplashXPathScanner::computeIntersections() {
       }
     }
   }
-  std::sort(allInter, allInter + allInterLen, cmpIntersectFunctor());
-
-  // build the list of y pointers
-  inter = (int *)gmallocn(yMax - yMin + 2, sizeof(int));
-  i = 0;
-  for (y = yMin; y <= yMax; ++y) {
-    inter[y - yMin] = i;
-    while (i < allInterLen && allInter[i].y <= y) {
-      ++i;
-    }
+  for (auto& line : allIntersections) {
+    std::sort(line.begin(), line.end(),
+              [](const SplashIntersect &i0, const SplashIntersect &i1) {
+                return i0.x0 < i1.x0;
+              });
   }
-  inter[yMax - yMin + 1] = i;
 }
 
 GBool SplashXPathScanner::addIntersection(double segYMin, double segYMax,
 					 Guint segFlags,
 					 int y, int x0, int x1) {
-  if (allInterLen == allInterSize) {
-    unsigned int newInterSize = ((unsigned int) allInterSize * 2 > INT_MAX / sizeof(SplashIntersect)) ? allInterSize + 32768 : allInterSize * 2;
-    if (newInterSize >= INT_MAX / sizeof(SplashIntersect)) {
-      error(errInternal, -1, "Bogus memory allocation size in SplashXPathScanner::addIntersection {0:d}", newInterSize);
-      return gFalse;
-    }
-    allInterSize = newInterSize;
-    allInter = (SplashIntersect *)greallocn(allInter, newInterSize,
-					    sizeof(SplashIntersect));
-  }
-  allInter[allInterLen].y = y;
+  SplashIntersect intersect;
+  intersect.y = y;
   if (x0 < x1) {
-    allInter[allInterLen].x0 = x0;
-    allInter[allInterLen].x1 = x1;
+    intersect.x0 = x0;
+    intersect.x1 = x1;
   } else {
-    allInter[allInterLen].x0 = x1;
-    allInter[allInterLen].x1 = x0;
+    intersect.x0 = x1;
+    intersect.x1 = x0;
   }
   if (segYMin <= y &&
       (SplashCoord)y < segYMax &&
       !(segFlags & splashXPathHoriz)) {
-    allInter[allInterLen].count = eo ? 1
-                                     : (segFlags & splashXPathFlip) ? 1 : -1;
+    intersect.count = eo ? 1
+                         : (segFlags & splashXPathFlip) ? 1 : -1;
   } else {
-    allInter[allInterLen].count = 0;
+    intersect.count = 0;
   }
-  ++allInterLen;
+
+  auto& line = allIntersections[y - yMin];
+  if (line.empty()) {
+      line.reserve(4);
+  }
+  line.push_back(intersect);
+
   return gTrue;
 }
 
 void SplashXPathScanner::renderAALine(SplashBitmap *aaBuf,
 				      int *x0, int *x1, int y, GBool adjustVertLine) {
-  int xx0, xx1, xx, xxMin, xxMax, yy, yyMax, interEnd;
+  int xx0, xx1, xx, xxMin, xxMax, yy, yyMax;
   Guchar mask;
   SplashColorPtr p;
 
@@ -396,23 +364,23 @@ void SplashXPathScanner::renderAALine(SplashBitmap *aaBuf,
     if (yyMax + splashAASize * y > yMax) {
       yyMax = yMax - splashAASize * y;
     }
-    interIdx = inter[splashAASize * y + yy - yMin];
 
     for (; yy <= yyMax; ++yy) {
-      interEnd = inter[splashAASize * y + yy - yMin + 1];
+      const auto& line = allIntersections[splashAASize * y + yy - yMin];
+      interIdx = 0;
       interCount = 0;
-      while (interIdx < interEnd) {
-	xx0 = allInter[interIdx].x0;
-	xx1 = allInter[interIdx].x1;
-	interCount += allInter[interIdx].count;
+      while (interIdx < line.size()) {
+	xx0 = line[interIdx].x0;
+	xx1 = line[interIdx].x1;
+	interCount += line[interIdx].count;
 	++interIdx;
-	while (interIdx < interEnd &&
-	       (allInter[interIdx].x0 <= xx1 ||
+	while (interIdx < line.size() &&
+	       (line[interIdx].x0 <= xx1 ||
 		(eo ? (interCount & 1) : (interCount != 0)))) {
-	  if (allInter[interIdx].x1 > xx1) {
-	    xx1 = allInter[interIdx].x1;
+	  if (line[interIdx].x1 > xx1) {
+	    xx1 = line[interIdx].x1;
 	  }
-	  interCount += allInter[interIdx].count;
+	  interCount += line[interIdx].count;
 	  ++interIdx;
 	}
 	if (xx0 < 0) {
@@ -459,7 +427,7 @@ void SplashXPathScanner::renderAALine(SplashBitmap *aaBuf,
 
 void SplashXPathScanner::clipAALine(SplashBitmap *aaBuf,
 				    int *x0, int *x1, int y) {
-  int xx0, xx1, xx, yy, yyMin, yyMax, interEnd;
+  int xx0, xx1, xx, yy, yyMin, yyMax;
   Guchar mask;
   SplashColorPtr p;
 
@@ -475,21 +443,21 @@ void SplashXPathScanner::clipAALine(SplashBitmap *aaBuf,
   for (yy = 0; yy < splashAASize; ++yy) {
     xx = *x0 * splashAASize;
     if (yy >= yyMin && yy <= yyMax) {
-      interIdx = inter[splashAASize * y + yy - yMin];
-      interEnd = inter[splashAASize * y + yy - yMin + 1];
+      const auto& line = allIntersections[splashAASize * y + yy - yMin];
+      interIdx = 0;
       interCount = 0;
-      while (interIdx < interEnd && xx < (*x1 + 1) * splashAASize) {
-	xx0 = allInter[interIdx].x0;
-	xx1 = allInter[interIdx].x1;
-	interCount += allInter[interIdx].count;
+      while (interIdx < line.size() && xx < (*x1 + 1) * splashAASize) {
+	xx0 = line[interIdx].x0;
+	xx1 = line[interIdx].x1;
+	interCount += line[interIdx].count;
 	++interIdx;
-	while (interIdx < interEnd &&
-	       (allInter[interIdx].x0 <= xx1 ||
+	while (interIdx < line.size() &&
+	       (line[interIdx].x0 <= xx1 ||
 		(eo ? (interCount & 1) : (interCount != 0)))) {
-	  if (allInter[interIdx].x1 > xx1) {
-	    xx1 = allInter[interIdx].x1;
+	  if (line[interIdx].x1 > xx1) {
+	    xx1 = line[interIdx].x1;
 	  }
-	  interCount += allInter[interIdx].count;
+	  interCount += line[interIdx].count;
 	  ++interIdx;
 	}
 	if (xx0 > aaBuf->getWidth()) {
diff --git a/splash/SplashXPathScanner.h b/splash/SplashXPathScanner.h
index 99f2c28f..37b85f4b 100644
--- a/splash/SplashXPathScanner.h
+++ b/splash/SplashXPathScanner.h
@@ -28,9 +28,16 @@
 
 #include "SplashTypes.h"
 
+#include <vector>
+
 class SplashXPath;
 class SplashBitmap;
-struct SplashIntersect;
+
+struct SplashIntersect {
+  int y;
+  int x0, x1;			// intersection of segment with [y, y+1)
+  int count;			// EO/NZWN counter increment
+};
 
 //------------------------------------------------------------------------
 // SplashXPathScanner
@@ -99,11 +106,10 @@ private:
   int xMin, yMin, xMax, yMax;
   GBool partialClip;
 
-  SplashIntersect *allInter;	// array of intersections
-  int allInterLen;		// number of intersections in <allInter>
-  int allInterSize;		// size of the <allInter> array
-  int *inter;			// indexes into <allInter> for each y value
-  int interIdx;			// current index into <inter> - used by
+  typedef std::vector<SplashIntersect> IntersectionLine;
+  std::vector<IntersectionLine> allIntersections;
+
+  unsigned int interIdx;	// current index into <inter> - used by
 				//   getNextSpan 
   int interCount;		// current EO/NZWN counter - used by
 				//   getNextSpan
-- 
2.16.3



More information about the poppler mailing list