[PATCH] Optimization to avoid scanning all .pc files

Dan Nicholson nicholson at endlessm.com
Sat Sep 10 18:10:48 UTC 2016


This looks a little better, but there are still some things in there I
think could be better. Before I get into the detailed review, can I make
some suggestions for providing these changes.

1. Please make a git commit with a detailed commit message describing
what's changing and why.

2. Since this is a non-trivial change (pkg-config has scanned the entire
path since the beginning), doing a detailed review of the patch is
critical. To that end, I think it would be better to move the patch to
somewhere with patch reviewing tools. There are 2 options. If you're
familiar with bugzilla, please create a bug on https://bugs.freedesktop.
org/ under the pkg-config component and attach your patch. If you're
familiar with github, I created a pkg-config mirror at
https://github.com/dbnicholson/pkg-config. You can file a pull request
to that repo.

3. There are indentation and whitespace issues introduced in your patch.
Please fix those.

Now to the review.

diff --git a/main.c b/main.c
index 227ed35..9b27d9a 100644
--- a/main.c
+++ b/main.c
@@ -643,7 +643,7 @@ main (int argc, char **argv)
         return 1;
     }
 
-  package_init ();
+  package_init (want_list);
 
   if (want_list)
     {
diff --git a/pkg.c b/pkg.c
index 5aa4380..140f4b3 100644
--- a/pkg.c
+++ b/pkg.c
@@ -43,11 +43,9 @@
 static void verify_package (Package *pkg);
 
 static GHashTable *packages = NULL;
-static GHashTable *locations = NULL;
 static GHashTable *path_positions = NULL;
 static GHashTable *globals = NULL;
 static GList *search_dirs = NULL;
-static int scanned_dir_count = 0;
 
 gboolean disable_uninstalled = FALSE;
 gboolean ignore_requires = FALSE;
@@ -121,7 +119,40 @@ name_ends_in_uninstalled (const char *str)
     return FALSE;
 }
 
+static char *
+add_package (const char *dirname, const char *filename, unsigned int
path_position)

It's probably better if path_position is a signed int since that's
what's stored in the path_positions hash table.

+{
+  unsigned const int dirnamelen = strlen (dirname);
+  int len = strlen (filename);
+
+  if (!ends_in_dotpc (filename))
+    {
+      debug_spew ("Ignoring file '%s' in search directory; not a .pc
file\n",
+                  filename);
+      return NULL;
+    }
+
+  char *pkgname = g_malloc (len - EXT_LEN + 1);

Please declare pkgname above.

+
+  debug_spew ("File '%s' appears to be a .pc file\n", filename);
+
+  strncpy (pkgname, filename, len - EXT_LEN);
+  pkgname[len-EXT_LEN] = '\0';
+
+  char *path = g_malloc (dirnamelen + 1 + len + 1);
+  strncpy (path, dirname, dirnamelen);
+  path[dirnamelen] = G_DIR_SEPARATOR;
+  strcpy (path + dirnamelen + 1, filename);
+
+  g_hash_table_insert (path_positions, pkgname,
+     GINT_TO_POINTER (path_position));
+  debug_spew ("Will find package '%s' in file '%s'\n",
+  pkgname, path);
+  return path;
+}
 
+static Package *
+internal_get_package (const char *name, gboolean warn);
 /* Look for .pc files in the given directory and add them into
  * locations, ignoring duplicates
  */
@@ -129,7 +160,9 @@ static void
 scan_dir (char *dirname)
 {
   GDir *dir;
-  const gchar *d_name;
+  const gchar *filename;
+  static unsigned int path_position = 0;
+  path_position++;

Rather that creating a static variable here, change scan_dir to take a
position argument. And, as above, probably better to make this an int
since that's what's stored in the path_positions hash table.
 
   int dirnamelen = strlen (dirname);
   /* Use a copy of dirname cause Win32 opendir doesn't like
@@ -161,59 +194,19 @@ scan_dir (char *dirname)
   dir = g_dir_open (dirname_copy, 0 , NULL);
   g_free (dirname_copy);
 
-  scanned_dir_count += 1;
-
   if (!dir)
     {
-      debug_spew ("Cannot open directory #%i '%s' in package search
path: %s\n",
-                  scanned_dir_count, dirname, g_strerror (errno));
+      debug_spew ("Cannot open directory '%s' in package search path:
%s\n", dirname, g_strerror (errno));

Looks like you dropped the position message here even though you have it
above.

       return;
     }
 
-  debug_spew ("Scanning directory #%i '%s'\n", scanned_dir_count,
dirname);
-  
-  while ((d_name = g_dir_read_name(dir)))
-    {
-      int len = strlen (d_name);
-
-      if (ends_in_dotpc (d_name))
-        {
-          char *pkgname = g_malloc (len - EXT_LEN + 1);
-
-          debug_spew ("File '%s' appears to be a .pc file\n", d_name);
-          
-          strncpy (pkgname, d_name, len - EXT_LEN);
-          pkgname[len-EXT_LEN] = '\0';
-
-          if (g_hash_table_lookup (locations, pkgname))
-            {
-              debug_spew ("File '%s' ignored, we already know about
package '%s'\n", d_name, pkgname);
-              g_free (pkgname);
-            }
-          else
-            {
-              char *filename = g_malloc (dirnamelen + 1 + len + 1);
-              strncpy (filename, dirname, dirnamelen);
-              filename[dirnamelen] = G_DIR_SEPARATOR;
-              strcpy (filename + dirnamelen + 1, d_name);
+  debug_spew ("Scanning directory '%s'\n", dirname);

Same.
               
-	      if (g_file_test(filename, G_FILE_TEST_IS_REGULAR) ==
TRUE) {
-		  g_hash_table_insert (locations, pkgname, filename);
-		  g_hash_table_insert (path_positions, pkgname,
-				       GINT_TO_POINTER
(scanned_dir_count));
-		  debug_spew ("Will find package '%s' in file '%s'\n",
-			      pkgname, filename);
-	      } else {
-		  debug_spew ("Ignoring '%s' while looking for '%s';
not a "
-			      "regular file.\n", pkgname, filename);
-	      }
-	    }
-        }
-      else
+  while ((filename = g_dir_read_name(dir)))
         {
-          debug_spew ("Ignoring file '%s' in search directory; not a
.pc file\n",
-                      d_name);
-        }
+      char *path = g_strdup_printf("%s/%s", dirname, filename);

Space before parentheses.

+      internal_get_package (path, FALSE);
+      g_free(path);

Same.

     }
   g_dir_close (dir);
 }
@@ -243,7 +236,7 @@ add_virtual_pkgconfig_package (void)
 }
 
 void
-package_init ()
+package_init (gboolean want_list)
 {
   static gboolean initted = FALSE;
 
@@ -252,15 +245,42 @@ package_init ()
       initted = TRUE;
       
       packages = g_hash_table_new (g_str_hash, g_str_equal);
-      locations = g_hash_table_new (g_str_hash, g_str_equal);
       path_positions = g_hash_table_new (g_str_hash, g_str_equal);
       
+      if (want_list)
+        g_list_foreach (search_dirs, (GFunc)scan_dir, NULL);

Here you'd pass in the position. You'd need to use a real loop instead
of g_list_foreach. Something like this.

GList *cur;
int position;

for (cur = search_dirs, position = 1; cur != NULL;
     cur = g_list_next (cur), position++)
  scan_dir (cur->data, position);

+      else
       add_virtual_pkgconfig_package ();

Why is this being skipped? Also, you broken the indentation.

+    }
+}
 
-      g_list_foreach (search_dirs, (GFunc)scan_dir, NULL);
+static char *
+search_package (const char *pkgname)
+{
+  char *ret = NULL;
+
+  char *filename = g_strdup_printf("%s.pc", pkgname);

Space before parentheses.

+  unsigned int path_position = 0;
+  GList *dir_iter;
+
+  for (dir_iter = search_dirs; dir_iter != NULL; dir_iter = g_list_next
(dir_iter))
+    {
+      path_position++;
+      char *dirname = dir_iter->data;
+      char *path = g_strdup_printf("%s/%s", dirname, filename);
+      const gboolean file_exists = g_file_test(path,
G_FILE_TEST_IS_REGULAR);

I don't think const on a non-pointer type makes any sense.

+      free(path);

Space before parentheses on all of these.

+      if (file_exists) {
+        ret = add_package (dirname, filename, path_position);

Why not pass the full path to add_package so you don't have to construct
it again there? In fact, the whole thing might be better inlined here
since you already have the dirname, filename and full path. All that
function does is check if the filename ends in .pc, stripping the .pc,
and adding it to path_positions. Or, you could do the ends_in_dotpc
check here, and simply pass the filename and path_position to
add_package where it can do the .pc stripping and insertion into
path_positions.

+        break;
     }
 }
 
+  g_free (filename);
+
+  return ret;
+}
+
 static Package *
 internal_get_package (const char *name, gboolean warn)
 {
@@ -302,8 +322,7 @@ internal_get_package (const char *name, gboolean
warn)
               return pkg;
             }
         }
-      
-      location = g_hash_table_lookup (locations, name);
+      location = search_package (name);
     }
   
   if (location == NULL)
@@ -326,8 +345,13 @@ internal_get_package (const char *name, gboolean
warn)
       const char *end = name + (len - EXT_LEN);
       const char *start = end;
 
-      while (start != name && *start != G_DIR_SEPARATOR)
+      while (start != name) {
         --start;
+        if(start[0] == G_DIR_SEPARATOR) {
+          start++;
+          break;
+        }
+      }

What's this part about? It looks unrelated to the rest of the patch.
 
       g_assert (end >= start);
 
@@ -893,7 +917,7 @@ verify_package (Package *pkg)
           system_dir_iter = system_dir_iter->next;
         }
     }
-  g_list_free (system_directories);
+  g_list_free_full (system_directories, free);

Put this in a separate patch, please.
 
   while (count)
     {
@@ -1193,8 +1217,8 @@ print_package_list (void)
   ignore_requires = TRUE;
   ignore_requires_private = TRUE;
 
-  g_hash_table_foreach (locations, max_len_foreach, &mlen);
-  g_hash_table_foreach (locations, packages_foreach, GINT_TO_POINTER
(mlen + 1));
+  g_hash_table_foreach (packages, max_len_foreach, &mlen);
+  g_hash_table_foreach (packages, packages_foreach, GINT_TO_POINTER
(mlen + 1));
 }
 
 void
diff --git a/pkg.h b/pkg.h
index 13cc2c6..c6732bd 100644
--- a/pkg.h
+++ b/pkg.h
@@ -98,7 +98,7 @@ char *   packages_get_var          (GList      *pkgs,
 
 void add_search_dir (const char *path);
 void add_search_dirs (const char *path, const char *separator);
-void package_init (void);
+void package_init (gboolean want_list);
 int compare_versions (const char * a, const char *b);
 gboolean version_test (ComparisonType comparison,
                        const char *a,

--
Dan Nicholson  |  +1.206.437.0833  |  Endless


On Fri, Sep 9, 2016 at 9:44 PM, Marco Diego Aurélio Mesquita <marcodiego
mesquita at gmail.com> wrote:
> 
> Sorry, forgot to attach it.
> 
> On Sat, Sep 10, 2016 at 1:32 AM, Marco Diego Aurélio Mesquita
> <marcodiegomesquita at gmail.com> wrote:
> > 
> > Hi Devs!
> > 
> > I tried to improve the patch a bit more. The "locations" hashtable
> > is
> > now gone. Run times for the tests seem about 10% faster now.
> > 
> > Matthew would you mind to test it?
> > 
> > Also, I think the "path_positions" hashtable can be removed. Maybe
> > it
> > will improve the speed a little bit more.
> > 
> > 
> > On Fri, Sep 9, 2016 at 6:08 PM, Matthew Hanna (BLOOMBERG/ 731 LEX)
> > <mhanna21 at bloomberg.net> wrote:
> > > 
> > > Results of tests of this patch on our system are mixed. The
> > > optimization
> > > does
> > > not appear to be needed on our Linux boxes. The time to read our
> > > pkgconfig
> > > directory is in the milliseconds. On other platforms, the time is
> > > in
> > > seconds,
> > > so I was hopeful that this patch would reduce that time. It
> > > certainly
> > > reduces
> > > the minimum speed, but the maximum speed is actually increased. In
> > > one test,
> > > the time without the patch is 2 seconds, with almost all of that
> > > time taken
> > > up
> > > by the directory reading. With the patch, the minimum time is now
> > > in
> > > milliseconds for a low level library with few dependencies, but is
> > > now over
> > > 5
> > > seconds for a higher level library.
> > > 
> > > From: marcodiegomesquita at gmail.com At: 08/27/16 10:53:48
> > > To: Matthew Hanna (BLOOMBERG/ 731 LEX), nicholson at endlessm.com
> > > Cc: pkg-config at lists.freedesktop.org
> > > Subject: Re: [PATCH] Optimization to avoid scanning all .pc files
> > > 
> > > Hi Devs!
> > > 
> > > 
> > > Attached is just a small improvement to the patch: code
> > > improvements
> > > and path_position calculation is smarter. I think it is now good
> > > to
> > > go.
> > > 
> > > Please review it.
> > > 
> > > 
> > > On Fri, Aug 26, 2016 at 10:04 PM, Marco Diego Aurélio Mesquita
> > > <marcodiegomesquita at gmail.com> wrote:
> > > > 
> > > > Hi Matthew!
> > > > 
> > > > Thanks for the review. The attached patch implements the changes
> > > > you
> > > > asked.
> > > > 
> > > > Is there anything else that should be changed?
> > > > 
> > > > On Fri, Aug 26, 2016 at 8:25 PM, Matthew Hanna (BLOOMBERG/ 731
> > > > LEX)
> > > > <mhanna21 at bloomberg.net> wrote:
> > > > > 
> > > > > --- Original Sender: INTERNET2 .PROG/MSG/INT, BLOOMBERG/ 330
> > > > > WEST ---
> > > > > 
> > > > > The call to 'access' should be replaced by 'g_file_test' for
> > > > > portability
> > > > > and/or platform optimization.
> > > > > 
> > > > > Are there whitespace changes in the diff? If so, please
> > > > > remove. It makes
> > > > > the diff very hard to read.
> > > > > 
> > > > > ---
> > > > > Sent from Bloomberg Professional for Android
> > > > > 
> > > > > 
> > > > > ----- Original Message -----
> > > > > FROM: Marco Diego Aurélio Mesquita <marcodiegomesquita at gmail.c
> > > > > om>
> > > > > At: 27-Aug-2016 01:03:59
> > > > > 
> > > > > Hi devs!
> > > > > 
> > > > > The attached patch improves pkg-config so that it will look
> > > > > for
> > > > > required .pc files only, instead of loading and parsing all
> > > > > known .pc
> > > > > files.
> > > > > 
> > > > > Please review it.
> > > > > 
> > > > > ----------------------------
> > > > > 
> > > > > _______________________________________________
> > > > > pkg-config mailing list
> > > > > pkg-config at lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/pkg-config
> > > > > _______________________________________________
> > > > > pkg-config mailing list
> > > > > pkg-config at lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/pkg-config
> > > 
> > > 
> > > _______________________________________________
> > > pkg-config mailing list
> > > pkg-config at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/pkg-config
> > > 
> _______________________________________________
> pkg-config mailing list
> pkg-config at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/pkg-config
> 


More information about the pkg-config mailing list