[poppler] Getting a raster image like pdf2ppm

Angus March angus at uducat.com
Tue May 19 08:00:46 PDT 2009


Adrian Johnson wrote:
> Angus March wrote:
>> I tried using Poppler to get a Cairo surface and then saving the surface
>> to a PNG. Unfortunately, the resulting image was of disastrously low
>> quality.
>
> Without seeing your code or the output you are getting I can only
> guess at what the problem might be. Did you alter the cairo scale to
> get the desired image dpi?

    It was definitely an improvement, but I think the only thing that
did improve was the resolution. The old problems that caused me to
abandon Cairo persisted, which are: gradients have ugly stripes on them,
a background that should be white and opaque is black and transparent,
and some text that has a shadow in the PDF doesn't in the image. I don't
suppose you know of a way to deal w/those problems.

    But this is my code for the Splash solution I was using, derived
from the pdftoppm code I found. Again, my problems there are
segmentation faults, which valgrind reported on. I won't repeat those
reports, unless someone asks me to. Don't be fooled by the word
"thread". It's a throwback to when I had an MT solution. This solution,
as I'm sure you'll figure out, is multi-process. This is the entirety of
my Poppler and Splash code. Do you see anything that I missed?

void pdf2jpg::Execute(int desRead, const char *pPDFName, const char
*pJPGPrefix, int nMaxDimension) {
    int &resolution = m_resolution;

    m_sCurrentPDF = pPDFName;
    signal(SIGSEGV, sig_handler);

    boost::scoped_ptr<GlobalParams> bs(new GlobalParams());
    globalParams = bs.get();
    resolution = 150;
    GooString *filename = new GooString(pPDFName);
    m_pdoc = new PDFDoc(filename);
    PDFDoc &doc = *m_pdoc;
    if (!doc.isOk()) {
        g_pLog->LogLine((astring)"Error opening " + pPDFName + " by
PDFDoc (Poppler)");
        return;
    }
    SplashColor &paperColor = m_paperColor;
    paperColor[0] =255;paperColor[1] = 255;paperColor[2] = 255;

    bool bWorkerProcess = false;

    {
        double &pg_w = m_pg_w, &pg_h = m_pg_h;
        int &w = m_w, &h = m_h;
        pg_w = doc.getPageMediaWidth(1);
    pg_h = doc.getPageMediaHeight(1);
    pg_w = pg_w * (resolution / 72.0);
    pg_h = pg_h * (resolution / 72.0);
    if (doc.getPageRotate(1)) {
      double tmp = pg_w;
      pg_w = pg_h;
      pg_h = tmp;
    }
        w = (int)ceil(pg_w);
      h = (int)ceil(pg_h);
        w = (0+w > pg_w ? (int)ceil(pg_w-0) : w);
      h = (0+h > pg_h ? (int)ceil(pg_h-0) : h);
    }
    {
        SplashOutputDev *splashOut = GetSplash(1);
        SplashBitmap &bmp = *splashOut->getBitmap();
        m_nInputWidth = bmp.getWidth();m_nInputHeight = bmp.getHeight();
        if (m_nInputWidth > nMaxDimension || m_nInputHeight >
nMaxDimension) {
            double dScale;
            if (m_nInputWidth > m_nInputHeight) {
                dScale = nMaxDimension/double(m_nInputWidth);
                m_nOutputWidth = nMaxDimension;m_nOutputHeight =
m_nInputHeight*dScale;
            }
            else {
                dScale = nMaxDimension/double(m_nInputHeight);
                m_nOutputWidth = m_nInputWidth*dScale;m_nOutputHeight =
nMaxDimension;
            }
        }
        else {
            m_nOutputWidth = m_nInputWidth, m_nOutputHeight =
m_nInputHeight;
        }
        delete splashOut;
    }
  for (int pg = 1; pg <= doc.getNumPages() && !bWorkerProcess; ++pg) {
        char pageno[5];sprintf(pageno, "%04d", pg - 1);
        bWorkerProcess = TopupJpegThreads(pg, (astring)pJPGPrefix +
pageno + ".jpg");

        if (!bWorkerProcess) {   //multi-process stuff. Nothing to do
w/Poppler/Splash
            fd_set fdsReceive;                        //check to see if
another core has been freed up
            FD_ZERO(&fdsReceive);
            int desMax = desRead;
            FD_SET(desRead, &fdsReceive);
            struct timeval _timeout;
            bzero(&_timeout, sizeof(_timeout));
            int nSelect = select(desMax + 1, &fdsReceive, NULL, NULL,
&_timeout);
            if (nSelect == 1) {
                m_nMaxThreadCount++;
                char n[4];
                ssize_t nRead = read(desRead, &n, 4);
                assert(nRead == 1);
                assert(n[0] == 1);
            }
            else assert(nSelect == 0);
        }
  }
    verify(close(desRead) == 0);
    if (!bWorkerProcess)
        for (; m_nRunningProcessCount > 0; --m_nRunningProcessCount)
            verify(wait(NULL) > 0);
}

bool pdf2jpg::TopupJpegThreads(int pg, const astring &sFileName) {
    const int &nInputWidth = m_nInputWidth, &nInputHeight =
m_nInputHeight, &nOutputWidth = m_nOutputWidth, &nOutputHeight =
m_nOutputHeight;
    if (m_nRunningProcessCount == m_nMaxThreadCount) {
        verify(wait(NULL) > 0);
        --m_nRunningProcessCount;
    }
    else assert(m_nRunningProcessCount < m_nMaxThreadCount);
    pid_t pid = fork();
    bool bWorkerProcess = pid == 0;
    if (!bWorkerProcess) {
        m_nRunningProcessCount++;
        return false;
    }
    boost::scoped_ptr<SplashOutputDev> splashOut(GetSplash(pg));//this
is the only Poppler/Splash part of the function
    JpegThread jpeg(nInputWidth, nInputHeight, nOutputWidth, nOutputHeight);
    jpeg.GiveJob(splashOut.get(), sFileName);
    return true;
}

SplashOutputDev *pdf2jpg::GetSplash(int pg) {
  SplashOutputDev *splashOut = new SplashOutputDev(splashModeRGB8, 4,
gFalse, m_paperColor);
    PDFDoc &doc = *m_pdoc;
     splashOut->startDoc(doc.getXRef());
     doc.displayPageSlice(splashOut,
        pg, m_resolution, m_resolution,
        0,
        gTrue, gFalse, gFalse,
        0, 0, m_w, m_h
      );
    return splashOut;
}

void JpegThread::GiveJob(SplashOutputDev *pSplash, const astring
&sFileName) {
    m_pSplash = pSplash;
    m_sFileName = sFileName;

    char *pImage = (char *)malloc(3*m_nOutputWidth*m_nOutputHeight);
    bool bScaling = m_nInputWidth > m_nOutputWidth || m_nInputHeight >
m_nOutputHeight;//indicates we are downsampling
    double dScale;
    if (bScaling) {
        if (m_nInputWidth > m_nInputHeight)
            dScale = m_nOutputWidth/double(m_nInputWidth);
        else
            dScale = m_nOutputHeight/double(m_nInputHeight);
    }
    {
        assert(m_pSplash != NULL);
        SplashBitmap &bmp = *m_pSplash->getBitmap();
        Guchar *pImageOriginal = bmp.getDataPtr();
        int nRowLength = bmp.getRowSize();
        assert(nRowLength < 100000);
        assert(m_nOutputHeight < 100000 && m_nOutputWidth < 100000);
        if (bScaling) {
            assert(dScale > 0);
            for(int nOutputY = 0; nOutputY < m_nOutputHeight; nOutputY++) {
                double dInputY = nOutputY/dScale;
                // for each pixel in a destination row
                for(int nOutputX = 0; nOutputX < m_nOutputWidth;
nOutputX++) {
                    double dInputX = nOutputX/dScale;

                    Interpolate(pImage, pImageOriginal, dInputX,
dInputY, nOutputX, nOutputY, nRowLength, m_nInputHeight, m_nInputWidth,
m_nOutputWidth);
                }
            }
        }
        else {
            assert(pImage != NULL);
            char *pOut = pImage;
            Guchar *pIn = pImageOriginal;
            for(int y = 0; y < m_nOutputHeight; y++) {
                memcpy(pOut, pIn, 3*m_nOutputWidth);
                pOut += 3*m_nOutputWidth;
                assert(nRowLength == bmp.getRowSize());
                pIn += nRowLength;
            }
        }
    verify(write_jpeg(pImage, m_nOutputWidth, m_nOutputHeight,
m_sFileName));
    }
    free(pImage);
}


More information about the poppler mailing list