[poppler] problem with rectangles of links in a pdf

Vincent Torri vtorri at univ-evry.fr
Tue Oct 12 03:08:57 PDT 2010



On Tue, 12 Oct 2010, Carlos Garcia Campos wrote:

> Without looking at the code it's difficult to know what's wrong.

file attached. The link code begins at line 671. In the switch, just 
consider the default case. I didn't fix the other cases.

Vincent Torri
-------------- next part --------------
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <cmath>

#include <Evas.h>

#include <GlobalParams.h>
#include <PDFDoc.h>
#include <FontInfo.h>
#include <SplashOutputDev.h>
#include <TextOutputDev.h>
#include <splash/SplashBitmap.h>
#include <Outline.h>
#include <Gfx.h>
#include <PDFDocEncoding.h>
#include <UnicodeMap.h>
#include <ErrorCodes.h>
#include <Link.h>

#include "Eyesight.h"
#include "eyesight_private.h"
#include "eyesight_pdf.h"


#define DBG(...) EINA_LOG_DOM_DBG(_eyesight_pdf_log_domain, __VA_ARGS__)
#define INF(...) EINA_LOG_DOM_INFO(_eyesight_pdf_log_domain, __VA_ARGS__)
#define WRN(...) EINA_LOG_DOM_WARN(_eyesight_pdf_log_domain, __VA_ARGS__)
#define ERR(...) EINA_LOG_DOM_ERR(_eyesight_pdf_log_domain, __VA_ARGS__)
#define CRIT(...) EINA_LOG_DOM_CRIT(_eyesight_pdf_log_domain, __VA_ARGS__)


static int _eyesight_pdf_log_domain = -1;

static Eyesight_Link_Action_Kind
_eyesight_link_action_get(LinkAction *action)
{
  if (!action->isOk())
    return EYESIGHT_LINK_ACTION_UNKNOWN;

  switch (action->getKind())
    {
    case actionGoTo:
      return EYESIGHT_LINK_ACTION_GOTO;
    case actionGoToR:
      return EYESIGHT_LINK_ACTION_GOTO_NEW_FILE;
    case actionLaunch:
      return EYESIGHT_LINK_ACTION_LAUNCH;
    case actionURI:
      return EYESIGHT_LINK_ACTION_URI;
    case actionNamed:
      return EYESIGHT_LINK_ACTION_NAMED;
    case actionMovie:
      return EYESIGHT_LINK_ACTION_MOVIE;
    default:
      return EYESIGHT_LINK_ACTION_UNKNOWN;
    }
}

static int
_eyesight_link_action_page_get(PDFDoc *doc, LinkAction *action)
{
  int page;

  if (!action->isOk())
    return -1;

  GooString *named_dest = ((LinkGoTo *)action)->getNamedDest();
  LinkDest *dest = ((LinkGoTo *)action)->getDest();

  if (!dest && named_dest)
    {
      dest = doc->findDest(named_dest);
    }
  if (dest && dest->isOk())
    {
      if (dest->isPageRef())
        {
          page = doc->findPage(dest->getPageRef().num,
                               dest->getPageRef().gen) - 1;
        }
      else
        {
          page = dest->getPageNum() - 1;
        }
    }
  if (dest) delete dest;

  return page;
}

static char *
_unicode_to_char (Unicode *unicode,
                  int      len)
{
  static UnicodeMap *uMap = NULL;

  if (uMap == NULL)
    {
      GooString *enc = new GooString("UTF-8");
      uMap = globalParams->getUnicodeMap(enc);
      uMap->incRefCnt();
      delete enc;
    }

  GooString gstr;
  char buf[8]; /* 8 is enough for mapping an unicode char to a string */
  int i, n;

  for (i = 0; i < len; ++i)
    {
      n = uMap->mapUnicode(unicode[i], buf, sizeof(buf));
      gstr.append(buf, n);
    }

  return strdup (gstr.getCString());
}

static Eina_List *
_eyesight_index_fill(PDFDoc    *doc,
                     Eina_List *items,
                     GooList   *gitems)
{
  if (!gitems)
    return items;

  for (int i = 0; i < gitems->getLength(); i++) {
    Eyesight_Index_Item *item;
    OutlineItem     *oitem = (OutlineItem *)gitems->get(i);
    Unicode         *utitle = oitem->getTitle ();
    LinkAction      *action;

    item = eyesight_index_item_new();
    item->title = _unicode_to_char(utitle, oitem->getTitleLength());
    action = oitem->getAction();
    item->action = EYESIGHT_LINK_ACTION_UNKNOWN;
//    if (action->getKind() != actionGoTo)
    item->page = _eyesight_link_action_page_get(doc, action);

    oitem->open ();
    if (oitem->hasKids() && oitem->getKids())
      item->children = _eyesight_index_fill(doc, item->children, oitem->getKids ());
    items = eina_list_append (items, item);
  }
  return items;
}

static void
_eyesight_index_unfill (Eina_List *items)
{
  Eyesight_Index_Item *item;

  if (!items)
    return;

  while (items)
    {
      Eyesight_Index_Item *item;

      item = (Eyesight_Index_Item *)eina_list_data_get(items);

      if (item->title)
        free (item->title);

      if (item->children)
        _eyesight_index_unfill (item->children);

      free (item);

      items = eina_list_remove_list(items, items);
    }
}

static Eina_Bool
em_init(Evas *evas, Evas_Object **obj, void **eyesight_backend)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eyesight_backend)
    return EINA_FALSE;

  ebp = (Eyesight_Backend_Pdf *)calloc(1, sizeof(Eyesight_Backend_Pdf));
  if (!ebp)
    return EINA_FALSE;

  if (!(globalParams = new GlobalParams()))
    goto free_ebp;

  ebp->obj = evas_object_image_add(evas);
  if (!ebp->obj)
    goto delete_params;

  *obj = ebp->obj;
  *eyesight_backend = ebp;

  return EINA_TRUE;

 delete_params:
  delete globalParams;
 free_ebp:
  free(ebp);

  return EINA_FALSE;
}

static void
em_shutdown(void *eb)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return;

  ebp = (Eyesight_Backend_Pdf *)eb;
  evas_object_del(ebp->obj);
  delete globalParams;
  free(eb);
}

static Eina_Bool
em_file_open(void *eb, const char *filename)
{
  Eyesight_Backend_Pdf *ebp;
  Object                obj;
  Outline              *outline;
  GooList              *gitems;

  if (!eb || !filename || !*filename)
    return EINA_FALSE;

  DBG("Open file %s", filename);

  ebp = (Eyesight_Backend_Pdf *)eb;
  ebp->filename = strdup(filename);
  if (!ebp->filename)
    return EINA_FALSE;

  ebp->doc.pdfdoc = new PDFDoc(new GooString(filename), NULL);
  if (!ebp->doc.pdfdoc)
    goto free_filename;

  if (!ebp->doc.pdfdoc->isOk() &&
      ebp->doc.pdfdoc->getErrorCode() != errEncrypted)
    {
      ERR("PDF file %s illformed", filename);
       goto delete_pdfdoc;
    }

  ebp->doc.scanner = new FontInfoScanner(ebp->doc.pdfdoc);
  if (!ebp->doc.scanner)
    goto delete_pdfdoc;

  if (ebp->doc.pdfdoc->getErrorCode() == errEncrypted)
    ebp->doc.locked = EINA_TRUE;
  else
    ebp->doc.locked = EINA_FALSE;

  ebp->doc.pdfdoc->getDocInfo(&obj);
  if (!obj.isDict())
    goto delete_scanner;

  ebp->doc.dict = obj.getDict();
  if (!ebp->doc.dict)
    goto delete_scanner;

  outline = ebp->doc.pdfdoc->getOutline();
  if (!outline)
    goto no_index;

  gitems = outline->getItems();
  if (!gitems)
    goto no_index;

  ebp->toc.index = _eyesight_index_fill (ebp->doc.pdfdoc, NULL, gitems);

 no_index:
  ebp->page.hscale = 1.0;
  ebp->page.vscale = 1.0;
  ebp->page.orientation = EYESIGHT_ORIENTATION_PORTRAIT;

  return EINA_TRUE;

 delete_scanner:
  obj.free();
  delete ebp->doc.scanner;
 delete_pdfdoc:
  delete ebp->doc.pdfdoc;
 free_filename:
  free(ebp->filename);

  return EINA_FALSE;
}

static void
em_file_close(void *eb)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return;

  ebp = (Eyesight_Backend_Pdf *)eb;

  DBG("Close file %s", ebp->filename);

  _eyesight_index_unfill(ebp->toc.index);
  delete ebp->doc.dict;
  delete ebp->doc.scanner;
  delete ebp->doc.pdfdoc;
  free(ebp->filename);
}

static Eina_List *
em_toc_get(void *eb)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return NULL;

  ebp = (Eyesight_Backend_Pdf *)eb;

  return ebp->toc.index;
}

static int
em_page_count(void *eb)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return 0;

  ebp = (Eyesight_Backend_Pdf *)eb;

  return ebp->doc.pdfdoc->getNumPages();
}

static void
em_page_set(void *eb, int page)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return;

  if (page < 0)
    {
       ERR("Page number is negative");
       return;
    }

  ebp = (Eyesight_Backend_Pdf *)eb;

  if (page >= ebp->doc.pdfdoc->getNumPages())
    WRN("Page number is beyond the maximal number of pages");

  Page *p = ebp->doc.pdfdoc->getCatalog()->getPage(page + 1);
  if (!p || !p->isOk())
    {
       ERR("Can not retrieve page %d", page);
       return;
    }

  DBG("page=%d", page);

  ebp->page.page = p;
}

static int
em_page_get(void *eb)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return 0;

  ebp = (Eyesight_Backend_Pdf *)eb;

  if (!ebp->page.page)
    return 0;

  return ebp->page.page->getNum() - 1;
}

static void
em_page_scale_set(void *eb, double hscale, double vscale)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return;

  DBG("horizontal scale=%f vertical scale=%f", hscale, vscale);

  ebp = (Eyesight_Backend_Pdf *)eb;

  ebp->page.hscale = hscale;
  ebp->page.vscale = vscale;
}

static void
em_page_scale_get(void *eb, double *hscale, double *vscale)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    {
      if (hscale) *hscale = 1.0;
      if (vscale) *vscale = 1.0;
    }

  ebp = (Eyesight_Backend_Pdf *)eb;

  if (hscale) *hscale = ebp->page.hscale;
  if (vscale) *vscale = ebp->page.vscale;
}

static void
em_page_orientation_set(void *eb, Eyesight_Orientation orientation)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return;

  DBG("orientation=%d", orientation);

  ebp = (Eyesight_Backend_Pdf *)eb;

  ebp->page.orientation = orientation;
}

static Eyesight_Orientation
em_page_orientation_get(void *eb)
{
  Eyesight_Backend_Pdf *ebp;

  if (!eb)
    return EYESIGHT_ORIENTATION_PORTRAIT;

  ebp = (Eyesight_Backend_Pdf *)eb;

  return ebp->page.orientation;
}

void
em_page_size_get(void *eb, int *width, int *height)
{
  Eyesight_Backend_Pdf *ebp;
  int rotate;
  int w;
  int h;

  if (!eb)
    {
      if (width) *width = 0;
      if (height) *height = 0;
      return;
    }

  ebp = (Eyesight_Backend_Pdf *)eb;

  rotate = ebp->page.page->getRotate();
  if (rotate == 90 || rotate == 270)
    {
      w = round(ebp->page.page->getMediaHeight());
      h = round(ebp->page.page->getMediaWidth());
    }
  else
    {
      w = round(ebp->page.page->getMediaWidth());
      h = round(ebp->page.page->getMediaHeight());
    }

  if (width) *width = w;
  if (height) *height = h;
}

static void
em_page_render(void *eb)
{
  Eyesight_Backend_Pdf *ebp;
  unsigned int    *m = NULL;
  SplashColor      white;
  SplashColorPtr   color_ptr;
  int              width;
  int              height;

  if (!eb)
    return;

  ebp = (Eyesight_Backend_Pdf *)eb;

  if (!ebp->page.page)
    ebp->page.page = ebp->doc.pdfdoc->getCatalog()->getPage(1);

  if (!ebp->page.page || !ebp->page.page->isOk())
    {
       ERR("Could not retrieve page from the document");
       return;
    }

  white[0] = 255;
  white[1] = 255;
  white[2] = 255;
  white[3] = 255;

  SplashOutputDev output_dev(splashModeXBGR8, 4, gFalse, white);
  output_dev.startDoc(ebp->doc.pdfdoc->getXRef());

  ebp->page.page->display(&output_dev,
                          72.0 * ebp->page.hscale,
                          72.0 * ebp->page.vscale,
                          ebp->page.orientation,
                          false, false, false,
                          ebp->doc.pdfdoc->getCatalog());
  color_ptr = output_dev.getBitmap()->getDataPtr();

  width = output_dev.getBitmap()->getWidth();
  height = output_dev.getBitmap()->getHeight();

  evas_object_image_size_set(ebp->obj, width, height);
  evas_object_image_fill_set(ebp->obj, 0, 0, width, height);
  m = (unsigned int *)evas_object_image_data_get(ebp->obj, 1);
  if (!m)
    {
       ERR("Could not retrieve data from the Evas Object");
       return;
    }

  memcpy(m, color_ptr, height * width * 4);
  evas_object_image_data_set(ebp->obj, m);
  evas_object_image_data_update_add(ebp->obj, 0, 0, width, height);
  evas_object_resize(ebp->obj, width, height);
}

char *
em_page_text_get(void *eb, Eina_Rectangle rect)
{
  Eyesight_Backend_Pdf *ebp;
  GooString     *sel_text;
  Gfx           *gfx;
  PDFRectangle   pdf_selection;
  char *res;

  if (!eb)
    return NULL;

  DBG("rect=(x=%d, y=%d, w=%d, h=%d)", rect.x, rect.y, rect.w, rect.h);

  ebp = (Eyesight_Backend_Pdf *)eb;

  TextOutputDev text_dev(NULL, 1, 0, 0);

  gfx = ebp->page.page->createGfx(&text_dev,
                                  72.0, 72.0, 0,
                                  false, /* useMediaBox */
                                  true, /* Crop */
                                  -1, -1, -1, -1,
                                  false, /* printing */
                                  ebp->doc.pdfdoc->getCatalog (),
                                  NULL, NULL, NULL, NULL);
  if (!gfx)
    return NULL;

  ebp->page.page->display(gfx);

  delete gfx;

  text_dev.endPage();

  pdf_selection.x1 = rect.x;
  pdf_selection.y1 = rect.y;
  pdf_selection.x2 = rect.x + rect.w;
  pdf_selection.y2 = rect.y + rect.h;

  sel_text = new GooString;
  if (!sel_text)
    return NULL;
  /* added selectionStyleGlyph to catch up with poppler 0.6. Is that correct
     or should we rather use selectionStyleLine or selectionStyleWord? :M: */
  sel_text = text_dev.getSelectionText (&pdf_selection, selectionStyleGlyph);
  res = strdup (sel_text->getCString ());
  delete sel_text;

  return res;
}

Eina_List *
em_page_text_find(void *eb, const char *text, Eina_Bool is_case_sensitive, Eina_Bool backward)
{
  Eyesight_Backend_Pdf *ebp;
  Eina_Rectangle *match;
  Eina_List      *matches = NULL;
  double          xMin, yMin, xMax, yMax;
  int             rotate;
  int             length;
  int             height;

  if (!eb)
    return NULL;

  DBG("text=%s", text);

  if (!text || !*text)
    return NULL;

  ebp = (Eyesight_Backend_Pdf *)eb;

#if 1
  GooString tmp (text);
  Unicode *s;
  length = tmp.getLength();
  s = (Unicode *)malloc(sizeof(Unicode) * length);
  if (!s)
    return NULL;
  bool anyNonEncoded = false;
  for (int j = 0; j < length && !anyNonEncoded; ++j)
    {
      s[j] = pdfDocEncoding[tmp.getChar(j) & 0xff];
      if (!s[j]) anyNonEncoded = true;
    }
  if ( anyNonEncoded )
    {
      for (int j = 0; j < length; ++j)
        {
          s[j] = tmp.getChar(j);
        }
    }
#endif

  length = strlen (text);

  TextOutputDev output_dev(NULL, 1, 0, 0);

  int h;
  rotate = ebp->page.page->getRotate ();
  if (rotate == 90 || rotate == 270)
    h = round(ebp->page.page->getMediaWidth());
  else
    h = round(ebp->page.page->getMediaHeight());

  /* FIXME: take into account the scale ?? So, save output_dev ?? */
  ebp->page.page->display(&output_dev, 72, 72, 0, false,
                          true, false,
                          ebp->doc.pdfdoc->getCatalog());

  xMin = 0;
  yMin = 0;
  while (output_dev.findText(s, tmp.getLength (),
                             0, 1, // startAtTop, stopAtBottom
                             1, 0, // startAtLast, stopAtLast
                             is_case_sensitive, backward, // caseSensitive, backwards
                              &xMin, &yMin, &xMax, &yMax)) {
    match = (Eina_Rectangle *)malloc (sizeof (Eina_Rectangle));
    if (match)
      {
        match->x = round(xMin);
        match->y = round(yMin); //h - yMax;
        match->w = round(xMax - xMin);
        match->h = round(yMax - yMin); //h - yMin;
        matches = eina_list_append (matches, match);
      }
  }

  return matches;
}

Eina_List *
em_page_links_get(void *eb)
{
  Eyesight_Backend_Pdf *ebp;
  Eina_List *links_list = NULL;
  Object obj;

  if (!eb)
    return NULL;

  ebp = (Eyesight_Backend_Pdf *)eb;

  Links links(ebp->page.page->getAnnots(&obj),
              ebp->doc.pdfdoc->getCatalog()->getBaseURI());

  obj.free();

  for (int i = 0; i < links.getNumLinks(); i++)
    {
      Eyesight_Link *link_item;
      Link *link;
      LinkAction *link_action;
      double x1, x2, y1, y2;

      link = links.getLink(i);
      if (!link->isOk())
        continue;

      link_action = link->getAction();
      link->getRect (&x1, &y1, &x2, &y2);
      x1 -= ebp->page.page->getCropBox()->x1;
      x2 -= ebp->page.page->getCropBox()->x1;
      y1 -= ebp->page.page->getCropBox()->y1;
      y2 -= ebp->page.page->getCropBox()->y1;

      link_item = _eyesight_link_new();
      if (!link_item)
        continue;

      link_item->action = _eyesight_link_action_get(link_action);
      link_item->page = _eyesight_link_action_page_get(ebp->doc.pdfdoc, link_action);

      switch (ebp->page.page->getRotate())
        {
        case 90:
          link_item->rect.x = (int)x1;
          link_item->rect.y = (int)(ebp->page.page->getMediaWidth() - x2);
          link_item->rect.w = (int)(y2 - y1);
          link_item->rect.h = (int)(x2 - x1);
          break;
        case 180:
          link_item->rect.x = (int)(ebp->page.page->getMediaWidth() - x2);
          link_item->rect.y = (int)(ebp->page.page->getMediaHeight() - y2);
          link_item->rect.w = (int)(x2 - x1);
          link_item->rect.h = (int)(y2 - y1);
          break;
        case 270:
          link_item->rect.x = (int)(ebp->page.page->getMediaHeight() - y2);
          link_item->rect.y = (int)x1;
          link_item->rect.w = (int)(y2 - y1);
          link_item->rect.h = (int)(x2 - x1);
          break;
        default:
          printf (" %f %f %f %f\n", x1, ebp->page.page->getMediaHeight() - y1, x2-x1, y2-y1);
          link_item->rect.x = round(x1);
          link_item->rect.y = round(ebp->page.page->getMediaHeight() - y1);
          link_item->rect.w = round(x2 - x1);
          link_item->rect.h = round(y2 - y1);
          break;
        }

      links_list = eina_list_prepend(links_list, link_item);
    }

  return links_list;
}

static Eyesight_Module _eyesight_module_pdf =
{
  em_init,
  em_shutdown,
  em_file_open,
  em_file_close,
  em_toc_get,
  em_page_count,
  em_page_set,
  em_page_get,
  em_page_scale_set,
  em_page_scale_get,
  em_page_orientation_set,
  em_page_orientation_get,
  em_page_size_get,
  em_page_render,
  em_page_text_get,
  em_page_text_find,
  em_page_links_get,

  NULL
};

static Eina_Bool
module_open(Evas *evas, Evas_Object **obj, const Eyesight_Module **module, void **backend)
{
   if (!module)
      return EINA_FALSE;

   if (_eyesight_pdf_log_domain < 0)
     {
        _eyesight_pdf_log_domain = eina_log_domain_register("eyesight-pdf", EINA_COLOR_LIGHTCYAN);
        if (_eyesight_pdf_log_domain < 0)
          {
             EINA_LOG_CRIT("Could not register log domain 'eyesight-pdf'");
             return EINA_FALSE;
          }
     }

   if (!_eyesight_module_pdf.init(evas, obj, backend))
     {
        ERR("Could not initialize module");
        eina_log_domain_unregister(_eyesight_pdf_log_domain);
        _eyesight_pdf_log_domain = -1;
       return EINA_FALSE;
     }

   *module = &_eyesight_module_pdf;
   return EINA_TRUE;
}

static void
module_close(Eyesight_Module *module, void *backend)
{
   eina_log_domain_unregister(_eyesight_pdf_log_domain);
   _eyesight_pdf_log_domain = -1;
   _eyesight_module_pdf.shutdown(backend);
}

Eina_Bool
pdf_module_init(void)
{
   return _eyesight_module_register("pdf", module_open, module_close);
}

void
pdf_module_shutdown(void)
{
   _eyesight_module_unregister("pdf");
}

#ifndef EYESIGHT_STATIC_BUILD_POPPLER

EINA_MODULE_INIT(pdf_module_init);
EINA_MODULE_SHUTDOWN(pdf_module_shutdown);

#endif


More information about the poppler mailing list