[poppler] Getting a raster image like pdf2ppm
Angus March
angus at uducat.com
Tue May 19 08:00:46 PDT 2009
Adrian Johnson wrote:
> Angus March wrote:
>> I tried using Poppler to get a Cairo surface and then saving the surface
>> to a PNG. Unfortunately, the resulting image was of disastrously low
>> quality.
>
> Without seeing your code or the output you are getting I can only
> guess at what the problem might be. Did you alter the cairo scale to
> get the desired image dpi?
It was definitely an improvement, but I think the only thing that
did improve was the resolution. The old problems that caused me to
abandon Cairo persisted, which are: gradients have ugly stripes on them,
a background that should be white and opaque is black and transparent,
and some text that has a shadow in the PDF doesn't in the image. I don't
suppose you know of a way to deal w/those problems.
But this is my code for the Splash solution I was using, derived
from the pdftoppm code I found. Again, my problems there are
segmentation faults, which valgrind reported on. I won't repeat those
reports, unless someone asks me to. Don't be fooled by the word
"thread". It's a throwback to when I had an MT solution. This solution,
as I'm sure you'll figure out, is multi-process. This is the entirety of
my Poppler and Splash code. Do you see anything that I missed?
void pdf2jpg::Execute(int desRead, const char *pPDFName, const char
*pJPGPrefix, int nMaxDimension) {
int &resolution = m_resolution;
m_sCurrentPDF = pPDFName;
signal(SIGSEGV, sig_handler);
boost::scoped_ptr<GlobalParams> bs(new GlobalParams());
globalParams = bs.get();
resolution = 150;
GooString *filename = new GooString(pPDFName);
m_pdoc = new PDFDoc(filename);
PDFDoc &doc = *m_pdoc;
if (!doc.isOk()) {
g_pLog->LogLine((astring)"Error opening " + pPDFName + " by
PDFDoc (Poppler)");
return;
}
SplashColor &paperColor = m_paperColor;
paperColor[0] =255;paperColor[1] = 255;paperColor[2] = 255;
bool bWorkerProcess = false;
{
double &pg_w = m_pg_w, &pg_h = m_pg_h;
int &w = m_w, &h = m_h;
pg_w = doc.getPageMediaWidth(1);
pg_h = doc.getPageMediaHeight(1);
pg_w = pg_w * (resolution / 72.0);
pg_h = pg_h * (resolution / 72.0);
if (doc.getPageRotate(1)) {
double tmp = pg_w;
pg_w = pg_h;
pg_h = tmp;
}
w = (int)ceil(pg_w);
h = (int)ceil(pg_h);
w = (0+w > pg_w ? (int)ceil(pg_w-0) : w);
h = (0+h > pg_h ? (int)ceil(pg_h-0) : h);
}
{
SplashOutputDev *splashOut = GetSplash(1);
SplashBitmap &bmp = *splashOut->getBitmap();
m_nInputWidth = bmp.getWidth();m_nInputHeight = bmp.getHeight();
if (m_nInputWidth > nMaxDimension || m_nInputHeight >
nMaxDimension) {
double dScale;
if (m_nInputWidth > m_nInputHeight) {
dScale = nMaxDimension/double(m_nInputWidth);
m_nOutputWidth = nMaxDimension;m_nOutputHeight =
m_nInputHeight*dScale;
}
else {
dScale = nMaxDimension/double(m_nInputHeight);
m_nOutputWidth = m_nInputWidth*dScale;m_nOutputHeight =
nMaxDimension;
}
}
else {
m_nOutputWidth = m_nInputWidth, m_nOutputHeight =
m_nInputHeight;
}
delete splashOut;
}
for (int pg = 1; pg <= doc.getNumPages() && !bWorkerProcess; ++pg) {
char pageno[5];sprintf(pageno, "%04d", pg - 1);
bWorkerProcess = TopupJpegThreads(pg, (astring)pJPGPrefix +
pageno + ".jpg");
if (!bWorkerProcess) { //multi-process stuff. Nothing to do
w/Poppler/Splash
fd_set fdsReceive; //check to see if
another core has been freed up
FD_ZERO(&fdsReceive);
int desMax = desRead;
FD_SET(desRead, &fdsReceive);
struct timeval _timeout;
bzero(&_timeout, sizeof(_timeout));
int nSelect = select(desMax + 1, &fdsReceive, NULL, NULL,
&_timeout);
if (nSelect == 1) {
m_nMaxThreadCount++;
char n[4];
ssize_t nRead = read(desRead, &n, 4);
assert(nRead == 1);
assert(n[0] == 1);
}
else assert(nSelect == 0);
}
}
verify(close(desRead) == 0);
if (!bWorkerProcess)
for (; m_nRunningProcessCount > 0; --m_nRunningProcessCount)
verify(wait(NULL) > 0);
}
bool pdf2jpg::TopupJpegThreads(int pg, const astring &sFileName) {
const int &nInputWidth = m_nInputWidth, &nInputHeight =
m_nInputHeight, &nOutputWidth = m_nOutputWidth, &nOutputHeight =
m_nOutputHeight;
if (m_nRunningProcessCount == m_nMaxThreadCount) {
verify(wait(NULL) > 0);
--m_nRunningProcessCount;
}
else assert(m_nRunningProcessCount < m_nMaxThreadCount);
pid_t pid = fork();
bool bWorkerProcess = pid == 0;
if (!bWorkerProcess) {
m_nRunningProcessCount++;
return false;
}
boost::scoped_ptr<SplashOutputDev> splashOut(GetSplash(pg));//this
is the only Poppler/Splash part of the function
JpegThread jpeg(nInputWidth, nInputHeight, nOutputWidth, nOutputHeight);
jpeg.GiveJob(splashOut.get(), sFileName);
return true;
}
SplashOutputDev *pdf2jpg::GetSplash(int pg) {
SplashOutputDev *splashOut = new SplashOutputDev(splashModeRGB8, 4,
gFalse, m_paperColor);
PDFDoc &doc = *m_pdoc;
splashOut->startDoc(doc.getXRef());
doc.displayPageSlice(splashOut,
pg, m_resolution, m_resolution,
0,
gTrue, gFalse, gFalse,
0, 0, m_w, m_h
);
return splashOut;
}
void JpegThread::GiveJob(SplashOutputDev *pSplash, const astring
&sFileName) {
m_pSplash = pSplash;
m_sFileName = sFileName;
char *pImage = (char *)malloc(3*m_nOutputWidth*m_nOutputHeight);
bool bScaling = m_nInputWidth > m_nOutputWidth || m_nInputHeight >
m_nOutputHeight;//indicates we are downsampling
double dScale;
if (bScaling) {
if (m_nInputWidth > m_nInputHeight)
dScale = m_nOutputWidth/double(m_nInputWidth);
else
dScale = m_nOutputHeight/double(m_nInputHeight);
}
{
assert(m_pSplash != NULL);
SplashBitmap &bmp = *m_pSplash->getBitmap();
Guchar *pImageOriginal = bmp.getDataPtr();
int nRowLength = bmp.getRowSize();
assert(nRowLength < 100000);
assert(m_nOutputHeight < 100000 && m_nOutputWidth < 100000);
if (bScaling) {
assert(dScale > 0);
for(int nOutputY = 0; nOutputY < m_nOutputHeight; nOutputY++) {
double dInputY = nOutputY/dScale;
// for each pixel in a destination row
for(int nOutputX = 0; nOutputX < m_nOutputWidth;
nOutputX++) {
double dInputX = nOutputX/dScale;
Interpolate(pImage, pImageOriginal, dInputX,
dInputY, nOutputX, nOutputY, nRowLength, m_nInputHeight, m_nInputWidth,
m_nOutputWidth);
}
}
}
else {
assert(pImage != NULL);
char *pOut = pImage;
Guchar *pIn = pImageOriginal;
for(int y = 0; y < m_nOutputHeight; y++) {
memcpy(pOut, pIn, 3*m_nOutputWidth);
pOut += 3*m_nOutputWidth;
assert(nRowLength == bmp.getRowSize());
pIn += nRowLength;
}
}
verify(write_jpeg(pImage, m_nOutputWidth, m_nOutputHeight,
m_sFileName));
}
free(pImage);
}
More information about the poppler
mailing list