[PATCH] Add tags encoding detect via enca

Heiher admin at heiher.info
Fri Apr 8 00:48:36 PDT 2011


---
 configure.ac                 |   13 +++++
 gst-libs/gst/tag/Makefile.am |    4 +-
 gst-libs/gst/tag/tags.c      |  100 +++++++++++++++++++++++++++++-------------
 3 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3325abc..d64030c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -716,6 +716,18 @@ AG_GST_CHECK_FEATURE(IVORBIS, [integer vorbis plug-in],
ivorbisdec, [
   fi
 ])

+dnl *** tags prefer to have enca ***
+translit(dnm, m, l) AM_CONDITIONAL(USE_ENCA, true)
+AG_GST_CHECK_FEATURE(ENCA, [enca support for tags],, [
+  AG_GST_CHECK_LIBHEADER(ENCA,
+    enca, enca_analyser_alloc,, enca.h, [
+    HAVE_ENCA="yes"
+    ENCA_CFLAGS=""
+    ENCA_LIBS="-lenca"
+    AC_SUBST(ENCA_LIBS)
+  ])
+])
+
 dnl *** libgio ***
 translit(dnm, m, l) AM_CONDITIONAL(USE_GIO, true)
 AG_GST_CHECK_FEATURE(GIO, [GIO library], gio, [
@@ -825,6 +837,7 @@ AM_CONDITIONAL(USE_OGG, false)
 AM_CONDITIONAL(USE_PANGO, false)
 AM_CONDITIONAL(USE_THEORA, false)
 AM_CONDITIONAL(USE_VORBIS, false)
+AM_CONDITIONAL(USE_ENCA, false)

 fi dnl of EXT plugins

diff --git a/gst-libs/gst/tag/Makefile.am b/gst-libs/gst/tag/Makefile.am
index 9e82463..1f77b87 100644
--- a/gst-libs/gst/tag/Makefile.am
+++ b/gst-libs/gst/tag/Makefile.am
@@ -9,8 +9,8 @@ lib_LTLIBRARIES = libgsttag- at GST_MAJORMINOR@.la
 libgsttag_ at GST_MAJORMINOR@_la_SOURCES = \
     gstvorbistag.c gstid3tag.c gstxmptag.c gstexiftag.c \
     lang.c tags.c gsttagdemux.c gsttageditingprivate.c
-libgsttag_ at GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS)
$(GST_BASE_CFLAGS) $(GST_CFLAGS)
-libgsttag_ at GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(LIBM)
+libgsttag_ at GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS)
$(GST_BASE_CFLAGS) $(GST_CFLAGS) $(ENCA_CFLAGS)
+libgsttag_ at GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(LIBM)
$(ENCA_LIBS)
 libgsttag_ at GST_MAJORMINOR@_la_LDFLAGS = $(GST_LIB_LDFLAGS) $(GST_ALL_LDFLAGS)
$(GST_LT_LDFLAGS)

 # lang-tables.dat contains generated static data and is included by lang.c
diff --git a/gst-libs/gst/tag/tags.c b/gst-libs/gst/tag/tags.c
index 9521821..46e2e3a 100644
--- a/gst-libs/gst/tag/tags.c
+++ b/gst-libs/gst/tag/tags.c
@@ -29,6 +29,10 @@

 #include <string.h>

+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
 /**
  * SECTION:gsttag
  * @short_description: additional tag definitions for plugins and applications
@@ -366,10 +370,13 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint
size,
     const gchar ** env_vars)
 {
   const gchar *cur_loc = NULL;
-
   gsize bytes_read;
-
   gchar *utf8 = NULL;
+#ifdef HAVE_ENCA
+  EncaAnalyser eanalyser;
+  gchar langname[3];
+  const gchar * env;
+#endif

   g_return_val_if_fail (data != NULL, NULL);

@@ -389,6 +396,35 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint
size,
     goto beach;
   }

+  while (env_vars && *env_vars != NULL) {
+    const gchar *env = NULL;
+
+    /* Try charsets specified via the environment */
+    env = g_getenv (*env_vars);
+    if (env != NULL && *env != '\0') {
+      gchar **c, **csets;
+
+      csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
+
+      for (c = csets; c && *c; ++c) {
+        GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c);
+        if ((utf8 =
+                g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL)))
{
+          if (bytes_read == size) {
+            g_strfreev (csets);
+            goto beach;
+          }
+          g_free (utf8);
+          utf8 = NULL;
+        }
+      }
+
+      g_strfreev (csets);
+    }
+    ++env_vars;
+  }
+
+#ifndef HAVE_ENCA
   /* check for and use byte-order-mark for UTF-16/32 cases */
   if (size >= 2) {
     const gchar *c = NULL;
@@ -435,34 +471,6 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint
size,
     }
   }

-  while (env_vars && *env_vars != NULL) {
-    const gchar *env = NULL;
-
-    /* Try charsets specified via the environment */
-    env = g_getenv (*env_vars);
-    if (env != NULL && *env != '\0') {
-      gchar **c, **csets;
-
-      csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
-
-      for (c = csets; c && *c; ++c) {
-        GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c);
-        if ((utf8 =
-                g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL)))
{
-          if (bytes_read == size) {
-            g_strfreev (csets);
-            goto beach;
-          }
-          g_free (utf8);
-          utf8 = NULL;
-        }
-      }
-
-      g_strfreev (csets);
-    }
-    ++env_vars;
-  }
-
   /* Try current locale (if not UTF-8) */
   if (!g_get_charset (&cur_loc)) {
     GST_LOG ("Trying to convert freeform string using locale ('%s')",
cur_loc);
@@ -500,9 +508,39 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint
size,

   g_free (utf8);
   return NULL;
+#else /* HAVE_ENCA */
+  env = g_getenv("LANG");
+  if(NULL == env) {
+      return NULL;
+  }
+  langname[0] = env[0];
+  langname[1] = env[1];
+  langname[2] = 0;
+
+  eanalyser = enca_analyser_alloc(langname);
+  if(eanalyser) {
+      EncaEncoding encoding;
+      const gchar * charset;
+
+      encoding = enca_analyse(eanalyser, (unsigned char*)data, size);
+      if(ENCA_CS_UNKNOWN != encoding.charset) {
+          charset = enca_charset_name(encoding.charset,
ENCA_NAME_STYLE_ICONV);
+
+          utf8 = g_convert (data, size, "UTF-8", charset, &bytes_read, NULL,
NULL);
+          if (utf8 != NULL && bytes_read == size) {
+              enca_analyser_free(eanalyser);
+              goto beach;
+          }
+      }
+
+      enca_analyser_free(eanalyser);
+  }
+
+  g_free(utf8);
+  return NULL;
+#endif

 beach:
-
   g_strchomp (utf8);
   if (utf8 && utf8[0] != '\0') {
     GST_LOG ("Returning '%s'", utf8);
-- 
1.7.4.4

-- 
Configure bugmail: https://bugzilla.gnome.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the QA contact for the bug.
You are the assignee for the bug.


More information about the gstreamer-bugs mailing list