[PATCH] Optimization to avoid scanning all .pc files

Marco Diego Aurélio Mesquita marcodiegomesquita at gmail.com
Sat Sep 10 04:44:35 UTC 2016


Sorry, forgot to attach it.

On Sat, Sep 10, 2016 at 1:32 AM, Marco Diego Aurélio Mesquita
<marcodiegomesquita at gmail.com> wrote:
> Hi Devs!
>
> I tried to improve the patch a bit more. The "locations" hashtable is
> now gone. Run times for the tests seem about 10% faster now.
>
> Matthew would you mind to test it?
>
> Also, I think the "path_positions" hashtable can be removed. Maybe it
> will improve the speed a little bit more.
>
>
> On Fri, Sep 9, 2016 at 6:08 PM, Matthew Hanna (BLOOMBERG/ 731 LEX)
> <mhanna21 at bloomberg.net> wrote:
>> Results of tests of this patch on our system are mixed. The optimization
>> does
>> not appear to be needed on our Linux boxes. The time to read our pkgconfig
>> directory is in the milliseconds. On other platforms, the time is in
>> seconds,
>> so I was hopeful that this patch would reduce that time. It certainly
>> reduces
>> the minimum speed, but the maximum speed is actually increased. In one test,
>> the time without the patch is 2 seconds, with almost all of that time taken
>> up
>> by the directory reading. With the patch, the minimum time is now in
>> milliseconds for a low level library with few dependencies, but is now over
>> 5
>> seconds for a higher level library.
>>
>> From: marcodiegomesquita at gmail.com At: 08/27/16 10:53:48
>> To: Matthew Hanna (BLOOMBERG/ 731 LEX), nicholson at endlessm.com
>> Cc: pkg-config at lists.freedesktop.org
>> Subject: Re: [PATCH] Optimization to avoid scanning all .pc files
>>
>> Hi Devs!
>>
>>
>> Attached is just a small improvement to the patch: code improvements
>> and path_position calculation is smarter. I think it is now good to
>> go.
>>
>> Please review it.
>>
>>
>> On Fri, Aug 26, 2016 at 10:04 PM, Marco Diego Aurélio Mesquita
>> <marcodiegomesquita at gmail.com> wrote:
>>> Hi Matthew!
>>>
>>> Thanks for the review. The attached patch implements the changes you
>>> asked.
>>>
>>> Is there anything else that should be changed?
>>>
>>> On Fri, Aug 26, 2016 at 8:25 PM, Matthew Hanna (BLOOMBERG/ 731 LEX)
>>> <mhanna21 at bloomberg.net> wrote:
>>>> --- Original Sender: INTERNET2 .PROG/MSG/INT, BLOOMBERG/ 330 WEST ---
>>>>
>>>> The call to 'access' should be replaced by 'g_file_test' for portability
>>>> and/or platform optimization.
>>>>
>>>> Are there whitespace changes in the diff? If so, please remove. It makes
>>>> the diff very hard to read.
>>>>
>>>> ---
>>>> Sent from Bloomberg Professional for Android
>>>>
>>>>
>>>> ----- Original Message -----
>>>> FROM: Marco Diego Aurélio Mesquita <marcodiegomesquita at gmail.com>
>>>> At: 27-Aug-2016 01:03:59
>>>>
>>>> Hi devs!
>>>>
>>>> The attached patch improves pkg-config so that it will look for
>>>> required .pc files only, instead of loading and parsing all known .pc
>>>> files.
>>>>
>>>> Please review it.
>>>>
>>>> ----------------------------
>>>>
>>>> _______________________________________________
>>>> pkg-config mailing list
>>>> pkg-config at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/pkg-config
>>>> _______________________________________________
>>>> pkg-config mailing list
>>>> pkg-config at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/pkg-config
>>
>>
>>
>> _______________________________________________
>> pkg-config mailing list
>> pkg-config at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/pkg-config
>>
-------------- next part --------------
diff --git a/main.c b/main.c
index 227ed35..9b27d9a 100644
--- a/main.c
+++ b/main.c
@@ -643,7 +643,7 @@ main (int argc, char **argv)
         return 1;
     }
 
-  package_init ();
+  package_init (want_list);
 
   if (want_list)
     {
diff --git a/pkg.c b/pkg.c
index 5aa4380..140f4b3 100644
--- a/pkg.c
+++ b/pkg.c
@@ -43,11 +43,9 @@
 static void verify_package (Package *pkg);
 
 static GHashTable *packages = NULL;
-static GHashTable *locations = NULL;
 static GHashTable *path_positions = NULL;
 static GHashTable *globals = NULL;
 static GList *search_dirs = NULL;
-static int scanned_dir_count = 0;
 
 gboolean disable_uninstalled = FALSE;
 gboolean ignore_requires = FALSE;
@@ -121,7 +119,40 @@ name_ends_in_uninstalled (const char *str)
     return FALSE;
 }
 
+static char *
+add_package (const char *dirname, const char *filename, unsigned int path_position)
+{
+  unsigned const int dirnamelen = strlen (dirname);
+  int len = strlen (filename);
+
+  if (!ends_in_dotpc (filename))
+    {
+      debug_spew ("Ignoring file '%s' in search directory; not a .pc file\n",
+                  filename);
+      return NULL;
+    }
+
+  char *pkgname = g_malloc (len - EXT_LEN + 1);
+
+  debug_spew ("File '%s' appears to be a .pc file\n", filename);
+
+  strncpy (pkgname, filename, len - EXT_LEN);
+  pkgname[len-EXT_LEN] = '\0';
+
+  char *path = g_malloc (dirnamelen + 1 + len + 1);
+  strncpy (path, dirname, dirnamelen);
+  path[dirnamelen] = G_DIR_SEPARATOR;
+  strcpy (path + dirnamelen + 1, filename);
+
+  g_hash_table_insert (path_positions, pkgname,
+     GINT_TO_POINTER (path_position));
+  debug_spew ("Will find package '%s' in file '%s'\n",
+  pkgname, path);
+  return path;
+}
 
+static Package *
+internal_get_package (const char *name, gboolean warn);
 /* Look for .pc files in the given directory and add them into
  * locations, ignoring duplicates
  */
@@ -129,7 +160,9 @@ static void
 scan_dir (char *dirname)
 {
   GDir *dir;
-  const gchar *d_name;
+  const gchar *filename;
+  static unsigned int path_position = 0;
+  path_position++;
 
   int dirnamelen = strlen (dirname);
   /* Use a copy of dirname cause Win32 opendir doesn't like
@@ -161,59 +194,19 @@ scan_dir (char *dirname)
   dir = g_dir_open (dirname_copy, 0 , NULL);
   g_free (dirname_copy);
 
-  scanned_dir_count += 1;
-
   if (!dir)
     {
-      debug_spew ("Cannot open directory #%i '%s' in package search path: %s\n",
-                  scanned_dir_count, dirname, g_strerror (errno));
+      debug_spew ("Cannot open directory '%s' in package search path: %s\n", dirname, g_strerror (errno));
       return;
     }
 
-  debug_spew ("Scanning directory #%i '%s'\n", scanned_dir_count, dirname);
-  
-  while ((d_name = g_dir_read_name(dir)))
-    {
-      int len = strlen (d_name);
-
-      if (ends_in_dotpc (d_name))
-        {
-          char *pkgname = g_malloc (len - EXT_LEN + 1);
-
-          debug_spew ("File '%s' appears to be a .pc file\n", d_name);
-          
-          strncpy (pkgname, d_name, len - EXT_LEN);
-          pkgname[len-EXT_LEN] = '\0';
-
-          if (g_hash_table_lookup (locations, pkgname))
-            {
-              debug_spew ("File '%s' ignored, we already know about package '%s'\n", d_name, pkgname);
-              g_free (pkgname);
-            }
-          else
-            {
-              char *filename = g_malloc (dirnamelen + 1 + len + 1);
-              strncpy (filename, dirname, dirnamelen);
-              filename[dirnamelen] = G_DIR_SEPARATOR;
-              strcpy (filename + dirnamelen + 1, d_name);
+  debug_spew ("Scanning directory '%s'\n", dirname);
               
-	      if (g_file_test(filename, G_FILE_TEST_IS_REGULAR) == TRUE) {
-		  g_hash_table_insert (locations, pkgname, filename);
-		  g_hash_table_insert (path_positions, pkgname,
-				       GINT_TO_POINTER (scanned_dir_count));
-		  debug_spew ("Will find package '%s' in file '%s'\n",
-			      pkgname, filename);
-	      } else {
-		  debug_spew ("Ignoring '%s' while looking for '%s'; not a "
-			      "regular file.\n", pkgname, filename);
-	      }
-	    }
-        }
-      else
+  while ((filename = g_dir_read_name(dir)))
         {
-          debug_spew ("Ignoring file '%s' in search directory; not a .pc file\n",
-                      d_name);
-        }
+      char *path = g_strdup_printf("%s/%s", dirname, filename);
+      internal_get_package (path, FALSE);
+      g_free(path);
     }
   g_dir_close (dir);
 }
@@ -243,7 +236,7 @@ add_virtual_pkgconfig_package (void)
 }
 
 void
-package_init ()
+package_init (gboolean want_list)
 {
   static gboolean initted = FALSE;
 
@@ -252,15 +245,42 @@ package_init ()
       initted = TRUE;
       
       packages = g_hash_table_new (g_str_hash, g_str_equal);
-      locations = g_hash_table_new (g_str_hash, g_str_equal);
       path_positions = g_hash_table_new (g_str_hash, g_str_equal);
       
+      if (want_list)
+        g_list_foreach (search_dirs, (GFunc)scan_dir, NULL);
+      else
       add_virtual_pkgconfig_package ();
+    }
+}
 
-      g_list_foreach (search_dirs, (GFunc)scan_dir, NULL);
+static char *
+search_package (const char *pkgname)
+{
+  char *ret = NULL;
+
+  char *filename = g_strdup_printf("%s.pc", pkgname);
+  unsigned int path_position = 0;
+  GList *dir_iter;
+
+  for (dir_iter = search_dirs; dir_iter != NULL; dir_iter = g_list_next (dir_iter))
+    {
+      path_position++;
+      char *dirname = dir_iter->data;
+      char *path = g_strdup_printf("%s/%s", dirname, filename);
+      const gboolean file_exists = g_file_test(path, G_FILE_TEST_IS_REGULAR);
+      free(path);
+      if (file_exists) {
+        ret = add_package (dirname, filename, path_position);
+        break;
     }
 }
 
+  g_free (filename);
+
+  return ret;
+}
+
 static Package *
 internal_get_package (const char *name, gboolean warn)
 {
@@ -302,8 +322,7 @@ internal_get_package (const char *name, gboolean warn)
               return pkg;
             }
         }
-      
-      location = g_hash_table_lookup (locations, name);
+      location = search_package (name);
     }
   
   if (location == NULL)
@@ -326,8 +345,13 @@ internal_get_package (const char *name, gboolean warn)
       const char *end = name + (len - EXT_LEN);
       const char *start = end;
 
-      while (start != name && *start != G_DIR_SEPARATOR)
+      while (start != name) {
         --start;
+        if(start[0] == G_DIR_SEPARATOR) {
+          start++;
+          break;
+        }
+      }
 
       g_assert (end >= start);
 
@@ -893,7 +917,7 @@ verify_package (Package *pkg)
           system_dir_iter = system_dir_iter->next;
         }
     }
-  g_list_free (system_directories);
+  g_list_free_full (system_directories, free);
 
   while (count)
     {
@@ -1193,8 +1217,8 @@ print_package_list (void)
   ignore_requires = TRUE;
   ignore_requires_private = TRUE;
 
-  g_hash_table_foreach (locations, max_len_foreach, &mlen);
-  g_hash_table_foreach (locations, packages_foreach, GINT_TO_POINTER (mlen + 1));
+  g_hash_table_foreach (packages, max_len_foreach, &mlen);
+  g_hash_table_foreach (packages, packages_foreach, GINT_TO_POINTER (mlen + 1));
 }
 
 void
diff --git a/pkg.h b/pkg.h
index 13cc2c6..c6732bd 100644
--- a/pkg.h
+++ b/pkg.h
@@ -98,7 +98,7 @@ char *   packages_get_var          (GList      *pkgs,
 
 void add_search_dir (const char *path);
 void add_search_dirs (const char *path, const char *separator);
-void package_init (void);
+void package_init (gboolean want_list);
 int compare_versions (const char * a, const char *b);
 gboolean version_test (ComparisonType comparison,
                        const char *a,


More information about the pkg-config mailing list