[GNUnet-SVN] r23196 - Extractor/src/plugins

gnunet-svn
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r23196 - Extractor/src/plugins

From:	gnunet
Subject:	[GNUnet-SVN] r23196 - Extractor/src/plugins
Date:	Fri, 10 Aug 2012 18:37:53 +0200
Author: grothoff
Date: 2012-08-10 18:37:53 +0200 (Fri, 10 Aug 2012)
New Revision: 23196

Modified:
   Extractor/src/plugins/ole2_extractor.c
Log:
hacking on OLE plugin

Modified: Extractor/src/plugins/ole2_extractor.c
===================================================================
--- Extractor/src/plugins/ole2_extractor.c      2012-08-10 15:33:26 UTC (rev 
23195)
+++ Extractor/src/plugins/ole2_extractor.c      2012-08-10 16:37:53 UTC (rev 
23196)
@@ -1,10 +1,10 @@
 /*
      This file is part of libextractor.
-     (C) 2004, 2005, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
+     (C) 2004, 2005, 2006, 2007, 2009, 2012 Vidyut Samanta and Christian 
Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
+     by the Free Software Foundation; either version 3, or (at your
      option) any later version.
 
      libextractor is distributed in the hope that it will be useful, but
@@ -24,16 +24,18 @@
      Part of this code was borrowed from wordleaker.cpp. See also
      the README file in this directory.
 */
-
+/**
+ * @file plugins/ole2_extractor.c
+ * @brief plugin to support OLE2 (DOC, XLS, etc.) files
+ * @author Christian Grothoff
+ */
 #include "platform.h"
 #include "extractor.h"
 #include "convert.h"
-
 #include <glib-object.h>
 #include <string.h>
 #include <stdio.h>
 #include <ctype.h>
-
 #include <gsf/gsf-utils.h>
 #include <gsf/gsf-input-memory.h>
 #include <gsf/gsf-infile.h>
@@ -42,20 +44,31 @@
 
 #define DEBUG_OLE2 0
 
-/* ******************************** main extraction code 
************************ */
 
+/**
+ * Give the given UTF8 string to LE by calling 'proc'.
+ *
+ * @param proc callback to invoke
+ * @param proc_cls closure for proc
+ * @param phrase metadata string to pass; may include spaces
+ *        just double-quotes or just a space in a double quote;
+ *        in those cases, nothing should be done
+ * @param type meta data type to use
+ * @return if 'proc' returned 1, otherwise 0
+ */
 static int
-addKeyword(EXTRACTOR_MetaDataProcessor proc,
-          void *proc_cls,
-          const char *phrase,
-          enum EXTRACTOR_MetaType type) {
-  if (strlen(phrase) == 0)
+add_metadata (EXTRACTOR_MetaDataProcessor proc,
+           void *proc_cls,
+           const char *phrase,
+           enum EXTRACTOR_MetaType type) 
+{
+  if (0 == strlen (phrase))
     return 0;
-  if (0 == strcmp(phrase, "\"\""))
+  if (0 == strcmp (phrase, "\"\""))
     return 0;
-  if (0 == strcmp(phrase, "\" \""))
+  if (0 == strcmp (phrase, "\" \""))
     return 0;
-  if (0 == strcmp(phrase, " "))
+  if (0 == strcmp (phrase, " "))
     return 0;
   return proc (proc_cls, 
               "ole2",
@@ -66,12 +79,26 @@
               strlen (phrase) +1);
 }
 
-typedef struct {
-  const char * text;
+
+/**
+ * Entry in the map from OLE meta type  strings
+ * to LE types.
+ */
+struct Matches 
+{
+  /**
+   * OLE description.
+   */
+  const char *text;
+
+  /**
+   * Corresponding LE type.
+   */
   enum EXTRACTOR_MetaType type;
-} Matches;
+};
 
-static Matches tmap[] = {
+
+static struct Matches tmap[] = {
   { "Title", EXTRACTOR_METATYPE_TITLE },
   { "PresentationFormat", EXTRACTOR_METATYPE_FORMAT },
   { "Category", EXTRACTOR_METATYPE_SECTION },
@@ -116,513 +143,683 @@
 };
 
 
+/**
+ * Closure for 'process_metadata'.
+ */
 struct ProcContext
 {
+  /**
+   * Function to call for meta data that was found.
+   */
   EXTRACTOR_MetaDataProcessor proc;
+
+  /**
+   * Closure for 'proc'.
+   */
   void *proc_cls;
+
+  /**
+   * Return value; 0 to continue to extract, 1 if we are done
+   */
   int ret;
 };
 
 
-static void processMetadata(gpointer key,
-                           gpointer value,
-                           gpointer user_data) {
+/**
+ * Function invoked by 'gst_msole_metadata_read' with
+ * metadata found in the document.
+ *
+ * @param key 'const char *' describing the meta data
+ * @param value the UTF8 representation of the meta data
+ * @param user_data our 'struct ProcContext' (closure)
+ */
+static void 
+process_metadata (gpointer key,
+                 gpointer value,
+                 gpointer user_data) 
+{
+  const char *type = key;
+  const GsfDocProp *prop = value;
   struct ProcContext *pc = user_data;
-  const char * type = key;
-  const GsfDocProp * prop = value;
-  const GValue * gval;
-  char * contents;
+  const GValue *gval;
+  char *contents;
   int pos;
 
-  if ( (key == NULL) ||
-       (value == NULL) )
+  if ( (NULL == key) ||
+       (NULL == value) )
     return;
-  if (pc->ret != 0)
+  if (0 != pc->ret)
     return;
-  gval = gsf_doc_prop_get_val(prop);
+  gval = gsf_doc_prop_get_val (prop);
 
   if (G_VALUE_TYPE(gval) == G_TYPE_STRING) 
     {
-      contents = strdup(g_value_get_string(gval));
+      contents = strdup (g_value_get_string (gval));
     }
   else
     {
       /* convert other formats? */
-      contents = g_strdup_value_contents(gval);
+      contents = g_strdup_value_contents (gval);
     }
-  if (contents == NULL)
+  if (NULL == contents)
     return;
-  if ( (strlen(contents) > 0) &&
-       (contents[strlen(contents)-1] == '\n') )
-    contents[strlen(contents)-1] = '\0';
-  pos = 0;
-  while (tmap[pos].text != NULL) 
-    {
-      if (0 == strcmp(tmap[pos].text,
-                     type))
-       break;
-      pos++;
-    }
+  if ( (strlen (contents) > 0) &&
+       ('\n' == contents[strlen (contents) - 1]) )
+    contents [strlen (contents) - 1] = '\0';
   if (0 == strcmp (type, "meta:generator"))
     {
-      const char * mimetype = "application/vnd.ms-files";
-      if((0 == strncmp(value, "Microsoft Word", 14)) ||
-        (0 == strncmp(value, "Microsoft Office Word", 21)))
+      const char *mimetype = "application/vnd.ms-files";
+      if ( (0 == strncmp (value, "Microsoft Word", 14)) ||
+          (0 == strncmp (value, "Microsoft Office Word", 21)))
        mimetype = "application/msword";
-      else if((0 == strncmp(value, "Microsoft Excel", 15)) ||
-             (0 == strncmp(value, "Microsoft Office Excel", 22)))
+      else if ( (0 == strncmp(value, "Microsoft Excel", 15)) ||
+               (0 == strncmp(value, "Microsoft Office Excel", 22)) )
        mimetype = "application/vnd.ms-excel";
-      else if((0 == strncmp(value, "Microsoft PowerPoint", 20)) ||
-             (0 == strncmp(value, "Microsoft Office PowerPoint", 27)))
+      else if ( (0 == strncmp(value, "Microsoft PowerPoint", 20)) ||
+               (0 == strncmp(value, "Microsoft Office PowerPoint", 27)) )
        mimetype = "application/vnd.ms-powerpoint";
-      else if(0 == strncmp(value, "Microsoft Project", 17))
+      else if (0 == strncmp(value, "Microsoft Project", 17))
        mimetype = "application/vnd.ms-project";
-      else if(0 == strncmp(value, "Microsoft Visio", 15))
+      else if (0 == strncmp(value, "Microsoft Visio", 15))
        mimetype = "application/vnd.visio";
-      else if(0 == strncmp(value, "Microsoft Office", 16))
+      else if (0 == strncmp(value, "Microsoft Office", 16))
        mimetype = "application/vnd.ms-office";
-      
-      if (0 != addKeyword(pc->proc,
-                         pc->proc_cls, mimetype, EXTRACTOR_METATYPE_MIMETYPE))
+      if (0 != add_metadata (pc->proc,
+                            pc->proc_cls, 
+                            mimetype, 
+                            EXTRACTOR_METATYPE_MIMETYPE))
        {
          free (contents);
          pc->ret = 1;
          return;
        }
     }
-  if (tmap[pos].text != NULL)
+  for (pos = 0; NULL != tmap[pos].text; pos++)
+    if (0 == strcmp (tmap[pos].text,
+                    type))
+      break;
+  if ( (NULL != tmap[pos].text) &&
+       (0 != add_metadata (pc->proc, pc->proc_cls,
+                          contents,
+                          tmap[pos].type)) )
     {
-      if (0 != addKeyword(pc->proc, pc->proc_cls,
-                         contents,
-                         tmap[pos].type))
-       {
-         free (contents);
-         pc->ret = 1;
-         return;
-       }
+      free (contents);
+      pc->ret = 1;
+      return;
     }
-#if DEBUG_OLE2
-  else
-    printf("No match for type `%s'\n",
-          type);
-#endif
   free(contents);
 }
 
 
+/**
+ * Function called on (Document)SummaryInformation OLE
+ * streams.
+ * 
+ * @param in the input OLE stream
+ * @param proc function to call on meta data found
+ * @param proc_cls closure for proc
+ * @return 0 to continue to extract, 1 if we are done
+ */
 static int
-process(GsfInput * in,
-       EXTRACTOR_MetaDataProcessor proc,
-       void *proc_cls)
+process (GsfInput *in,
+        EXTRACTOR_MetaDataProcessor proc,
+        void *proc_cls)
 {
   struct ProcContext pc;
-  GsfDocMetaData * sections;
-  GError * error;
+  GsfDocMetaData *sections;
 
   pc.proc = proc;
   pc.proc_cls = proc_cls;
   pc.ret = 0;
-  sections = gsf_doc_meta_data_new();
-  error = gsf_msole_metadata_read(in, sections);
-  if (error == NULL) {
-    gsf_doc_meta_data_foreach(sections,
-                             &processMetadata,
-                             &pc);
-  }
-  g_object_unref(G_OBJECT(sections));
+  sections = gsf_doc_meta_data_new ();
+  if (NULL == gsf_msole_metadata_read (in, sections))
+    {
+      gsf_doc_meta_data_foreach (sections,
+                                &process_metadata,
+                                &pc);
+    }
+  g_object_unref (G_OBJECT (sections));
   return pc.ret;
 }
 
+
+/**
+ * Function called on SfxDocumentInfo OLE
+ * streams.
+ * 
+ * @param in the input OLE stream
+ * @param proc function to call on meta data found
+ * @param proc_cls closure for proc
+ * @return 0 to continue to extract, 1 if we are done
+ */
 static int
-processSO(GsfInput * src,
-         EXTRACTOR_MetaDataProcessor proc,
-         void *proc_cls) {
-  off_t size = gsf_input_size(src);
-  if ( (size < 0x374) || (size > 4*1024*1024) )  /* == 0x375?? */
+process_star_office (GsfInput *src,
+                    EXTRACTOR_MetaDataProcessor proc,
+                    void *proc_cls) 
+{
+  off_t size = gsf_input_size (src);
+
+  if ( (size < 0x374) || 
+       (size > 4*1024*1024) )  /* == 0x375?? */
     return 0;
-  char buf[size];
-  gsf_input_read(src, size, (unsigned char*) buf);
-  if ( (buf[0] != 0x0F) ||
-       (buf[1] != 0x0) ||
-       (0 != strncmp(&buf[2],
-                    "SfxDocumentInfo",
-                    strlen("SfxDocumentInfo"))) ||
-       (buf[0x11] != 0x0B) ||
-       (buf[0x13] != 0x00) || /* pw protected! */
-       (buf[0x12] != 0x00) ) 
-    return 0;
-  buf[0xd3] = '\0';
-  if (buf[0x94] + buf[0x93] > 0)
-    if (0 != addKeyword(proc, proc_cls,
-                       &buf[0x95],
-                       EXTRACTOR_METATYPE_TITLE))
+  {
+    char buf[size];
+
+    gsf_input_read (src, size, (unsigned char*) buf);
+    if ( (buf[0] != 0x0F) ||
+        (buf[1] != 0x0) ||
+        (0 != strncmp (&buf[2],
+                       "SfxDocumentInfo",
+                       strlen ("SfxDocumentInfo"))) ||
+        (buf[0x11] != 0x0B) ||
+        (buf[0x13] != 0x00) || /* pw protected! */
+        (buf[0x12] != 0x00) ) 
+      return 0;
+    buf[0xd3] = '\0';
+    if ( (buf[0x94] + buf[0x93] > 0) &&
+        (0 != add_metadata (proc, proc_cls,
+                            &buf[0x95],
+                            EXTRACTOR_METATYPE_TITLE)) )
       return 1;
-  buf[0x114] = '\0';
-  if (buf[0xd5] + buf[0xd4] > 0)
-    if (0 != addKeyword(proc, proc_cls,
-                       &buf[0xd6],
-                       EXTRACTOR_METATYPE_SUBJECT))
+    buf[0x114] = '\0';
+    if ( (buf[0xd5] + buf[0xd4] > 0) &&
+        (0 != add_metadata (proc, proc_cls,
+                            &buf[0xd6],
+                            EXTRACTOR_METATYPE_SUBJECT)) _)
       return 1;
-  buf[0x215] = '\0';
-  if (buf[0x115] + buf[0x116] > 0)
-    if (0 != addKeyword(proc, proc_cls,
-                       &buf[0x117],
-                       EXTRACTOR_METATYPE_COMMENT))
+    buf[0x215] = '\0';
+    if ( (buf[0x115] + buf[0x116] > 0) &&
+        (0 != add_metadata (proc, proc_cls,
+                            &buf[0x117],
+                            EXTRACTOR_METATYPE_COMMENT)) )
       return 1;
-  buf[0x296] = '\0';
-  if (buf[0x216] + buf[0x217] > 0)
-    if (0 != addKeyword(proc, proc_cls,
-                       &buf[0x218],
-                       EXTRACTOR_METATYPE_KEYWORDS))
+    buf[0x296] = '\0';
+    if ( (buf[0x216] + buf[0x217] > 0) &&
+        (0 != add_metadata(proc, proc_cls,
+                           &buf[0x218],
+                           EXTRACTOR_METATYPE_KEYWORDS)) )
       return 1;
-  /* fixme: do timestamps,
-     mime-type, user-defined info's */
+    /* fixme: do timestamps,
+       mime-type, user-defined info's */
+  }
   return 0;
 }
 
-/* *************** wordleaker stuff *************** */
 
+/**
+ * We use "__" to translate using iso-639.
+ * 
+ * @param a string to translate
+ * @return translated string
+ */
 #define __(a) dgettext("iso-639", a)
 
-static const char * lidToLanguage( unsigned int lid ) {
-  switch ( lid ) {
-  case 0x0400:
-    return _("No Proofing");
-  case 0x0401:
-    return __("Arabic");
-  case 0x0402:
-    return __("Bulgarian");
-  case 0x0403:
-    return __("Catalan");
-  case 0x0404:
-    return _("Traditional Chinese");
-  case 0x0804:
-    return _("Simplified Chinese");
-  case 0x0405:
-    return __("Chechen");
-  case 0x0406:
-    return __("Danish");
-  case 0x0407:
-    return __("German");
-  case 0x0807:
-    return _("Swiss German");
-  case 0x0408:
-    return __("Greek");
-  case 0x0409:
-    return _("U.S. English");
-  case 0x0809:
-    return _("U.K. English");
-  case 0x0c09:
-    return _("Australian English");
-  case 0x040a:
-    return _("Castilian Spanish");
-  case 0x080a:
-    return _("Mexican Spanish");
-  case 0x040b:
-    return __("Finnish");
-  case 0x040c:
-    return __("French");
-  case 0x080c:
-    return _("Belgian French");
-  case 0x0c0c:
-    return _("Canadian French");
-  case 0x100c:
-    return _("Swiss French");
-  case 0x040d:
-    return __("Hebrew");
-  case 0x040e:
-    return __("Hungarian");
-  case 0x040f:
-    return __("Icelandic");
-  case 0x0410:
-    return __("Italian");
-  case 0x0810:
-    return _("Swiss Italian");
-  case 0x0411:
-    return __("Japanese");
-  case 0x0412:
-    return __("Korean");
-  case 0x0413:
-    return __("Dutch");
-  case 0x0813:
-    return _("Belgian Dutch");
-  case 0x0414:
-    return _("Norwegian Bokmal");
-  case 0x0814:
-    return __("Norwegian Nynorsk");
-  case 0x0415:
-    return __("Polish");
-  case 0x0416:
-    return __("Brazilian Portuguese");
-  case 0x0816:
-    return __("Portuguese");
-  case 0x0417:
-    return _("Rhaeto-Romanic");
-  case 0x0418:
-    return __("Romanian");
-  case 0x0419:
-    return __("Russian");
-  case 0x041a:
-    return _("Croato-Serbian (Latin)");
-  case 0x081a:
-    return _("Serbo-Croatian (Cyrillic)");
-  case 0x041b:
-    return __("Slovak");
-  case 0x041c:
+
+/**
+ * Get the language string for the given language ID (lid)
+ * value.
+ * 
+ * @param lid language id value
+ * @return language string corresponding to the lid
+ */
+static const char * 
+lid_to_language (unsigned int lid)
+{
+  switch (lid)
+    {
+    case 0x0400:
+      return _("No Proofing");
+    case 0x0401:
+      return __("Arabic");
+    case 0x0402:
+      return __("Bulgarian");
+    case 0x0403:
+      return __("Catalan");
+    case 0x0404:
+      return _("Traditional Chinese");
+    case 0x0804:
+      return _("Simplified Chinese");
+    case 0x0405:
+      return __("Chechen");
+    case 0x0406:
+      return __("Danish");
+    case 0x0407:
+      return __("German");
+    case 0x0807:
+      return _("Swiss German");
+    case 0x0408:
+      return __("Greek");
+    case 0x0409:
+      return _("U.S. English");
+    case 0x0809:
+      return _("U.K. English");
+    case 0x0c09:
+      return _("Australian English");
+    case 0x040a:
+      return _("Castilian Spanish");
+    case 0x080a:
+      return _("Mexican Spanish");
+    case 0x040b:
+      return __("Finnish");
+    case 0x040c:
+      return __("French");
+    case 0x080c:
+      return _("Belgian French");
+    case 0x0c0c:
+      return _("Canadian French");
+    case 0x100c:
+      return _("Swiss French");
+    case 0x040d:
+      return __("Hebrew");
+    case 0x040e:
+      return __("Hungarian");
+    case 0x040f:
+      return __("Icelandic");
+    case 0x0410:
+      return __("Italian");
+    case 0x0810:
+      return _("Swiss Italian");
+    case 0x0411:
+      return __("Japanese");
+    case 0x0412:
+      return __("Korean");
+    case 0x0413:
+      return __("Dutch");
+    case 0x0813:
+      return _("Belgian Dutch");
+    case 0x0414:
+      return _("Norwegian Bokmal");
+    case 0x0814:
+      return __("Norwegian Nynorsk");
+    case 0x0415:
+      return __("Polish");
+    case 0x0416:
+      return __("Brazilian Portuguese");
+    case 0x0816:
+      return __("Portuguese");
+    case 0x0417:
+      return _("Rhaeto-Romanic");
+    case 0x0418:
+      return __("Romanian");
+    case 0x0419:
+      return __("Russian");
+    case 0x041a:
+      return _("Croato-Serbian (Latin)");
+    case 0x081a:
+      return _("Serbo-Croatian (Cyrillic)");
+    case 0x041b:
+      return __("Slovak");
+    case 0x041c:
     return __("Albanian");
-  case 0x041d:
-    return __("Swedish");
-  case 0x041e:
-    return __("Thai");
-  case 0x041f:
-    return __("Turkish");
-  case 0x0420:
-    return __("Urdu");
-  case 0x0421:
-    return __("Bahasa");
-  case 0x0422:
-    return __("Ukrainian");
-  case 0x0423:
-    return __("Byelorussian");
-  case 0x0424:
-    return __("Slovenian");
-  case 0x0425:
-    return __("Estonian");
-  case 0x0426:
-    return __("Latvian");
-  case 0x0427:
-    return __("Lithuanian");
-  case 0x0429:
-    return _("Farsi");
-  case 0x042D:
-    return __("Basque");
-  case 0x042F:
-    return __("Macedonian");
-  case 0x0436:
-    return __("Afrikaans");
-  case 0x043E:
-    return __("Malayalam");
-  default:
-    return NULL;
-  }
+    case 0x041d:
+      return __("Swedish");
+    case 0x041e:
+      return __("Thai");
+    case 0x041f:
+      return __("Turkish");
+    case 0x0420:
+      return __("Urdu");
+    case 0x0421:
+      return __("Bahasa");
+    case 0x0422:
+      return __("Ukrainian");
+    case 0x0423:
+      return __("Byelorussian");
+    case 0x0424:
+      return __("Slovenian");
+    case 0x0425:
+      return __("Estonian");
+    case 0x0426:
+      return __("Latvian");
+    case 0x0427:
+      return __("Lithuanian");
+    case 0x0429:
+      return _("Farsi");
+    case 0x042D:
+      return __("Basque");
+    case 0x042F:
+      return __("Macedonian");
+    case 0x0436:
+      return __("Afrikaans");
+    case 0x043E:
+      return __("Malayalam");
+    default:
+      return NULL;
+    }
 }
 
 
+/**
+ * Extract editing history from XTable stream.
+ *
+ * @param stream OLE stream to process
+ * @param lcSttbSavedBy length of the revision history in bytes
+ * @param fcSttbSavedBy offset of the revision history in the stream
+ * @param proc function to call on meta data found
+ * @param proc_cls closure for proc
+ * @return 0 to continue to extract, 1 if we are done
+ */
 static int
-history_extract(GsfInput * stream,
-               unsigned int lcbSttbSavedBy,
-               unsigned int fcSttbSavedBy,
-               EXTRACTOR_MetaDataProcessor proc,
-               void *proc_cls)
+history_extract (GsfInput *stream,
+                unsigned int lcbSttbSavedBy,
+                unsigned int fcSttbSavedBy,
+                EXTRACTOR_MetaDataProcessor proc,
+                void *proc_cls)
 {
-  unsigned int where = 0;
-  unsigned char * lbuffer;
+  unsigned int where;
+  unsigned char *lbuffer;
   unsigned int i;
   unsigned int length;
-  char * author;
-  char * filename;
-  char * rbuf;
+  char *author;
+  char *filename;
+  char *rbuf;
   unsigned int nRev;
   int ret;
 
-  // goto offset of revision
-  gsf_input_seek(stream, fcSttbSavedBy, G_SEEK_SET);
-  if (gsf_input_remaining(stream) < lcbSttbSavedBy)
+  /* goto offset of revision information */
+  gsf_input_seek (stream, fcSttbSavedBy, G_SEEK_SET);
+  if (gsf_input_remaining (stream) < lcbSttbSavedBy)
     return 0;
-  lbuffer = malloc(lcbSttbSavedBy);
-  if (lbuffer == NULL)
+  if (NULL == (lbuffer = malloc (lcbSttbSavedBy)))
     return 0;
-  // read all the revision history
-  gsf_input_read(stream, lcbSttbSavedBy, lbuffer);
-  // there are n strings, so n/2 revisions (author & file)
+  /* read all the revision history */
+  gsf_input_read (stream, lcbSttbSavedBy, lbuffer);
+  /* there are n strings, so n/2 revisions (author & file) */
   nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2;
   where = 6;
   ret = 0;
-  for (i=0; i < nRev; i++) {
-    if (where >= lcbSttbSavedBy)
-      break;
-    length = lbuffer[where++];
-    if ( (where + 2 * length + 2 >= lcbSttbSavedBy) ||
-        (where + 2 * length + 2 <= where) )
-      break;
-    author = EXTRACTOR_common_convert_to_utf8((const char*) &lbuffer[where],
-                                             length * 2,
-                                             "UTF-16BE");
-    where += length * 2 + 1;
-    length = lbuffer[where++];
-    if ( (where + 2 * length >= lcbSttbSavedBy) ||
-        (where + 2 * length + 1 <= where) ) {
-      if (author != NULL)
-       free(author);
-      break;
+  for (i=0; i < nRev; i++) 
+    {
+      if (where >= lcbSttbSavedBy)
+       break;
+      length = lbuffer[where++];
+      if ( (where + 2 * length + 2 >= lcbSttbSavedBy) ||
+          (where + 2 * length + 2 <= where) )
+       break;
+      author = EXTRACTOR_common_convert_to_utf8 ((const char*) &lbuffer[where],
+                                                length * 2,
+                                                "UTF-16BE");
+      where += length * 2 + 1;
+      length = lbuffer[where++];
+      if ( (where + 2 * length >= lcbSttbSavedBy) ||
+          (where + 2 * length + 1 <= where) ) 
+       {
+         if (NULL != author)
+           free(author);
+         break;
+       }
+      filename = EXTRACTOR_common_convert_to_utf8 ((const char*) 
&lbuffer[where],
+                                                  length * 2,
+                                                  "UTF-16BE");
+      where += length * 2 + 1;
+      if ( (NULL != author) &&
+          (NULL != filename) )
+       {
+         if (NULL != (rbuf = malloc (strlen (author) + strlen (filename) + 
512)))
+           {
+             snprintf (rbuf, 
+                       512 + strlen (author) + strlen (filename),
+                       _("Revision #%u: Author `%s' worked on `%s'"),
+                       i,
+                       author,
+                       filename);
+             ret = add_metadata (proc, proc_cls,
+                                 rbuf,
+                                 EXTRACTOR_METATYPE_REVISION_HISTORY);    
+             free (rbuf);
+           }
+       }
+      if (NULL != author)
+       free (author);
+      if (NULL != filename)
+       free (filename);
+      if (0 != ret)
+       break;
     }
-    filename = EXTRACTOR_common_convert_to_utf8((const char*) &lbuffer[where],
-                                               length * 2,
-                                               "UTF-16BE");
-    where += length * 2 + 1;
-    if ( (author != NULL) &&
-        (filename != NULL) )
-      {
-       rbuf = malloc(strlen(author) + strlen(filename) + 512);
-       if (rbuf != NULL)
-         {
-           snprintf(rbuf, 
-                    512 + strlen(author) + strlen(filename),
-                    _("Revision #%u: Author '%s' worked on '%s'"),
-                    i, author, filename);
-           ret = addKeyword(proc, proc_cls,
-                            rbuf,
-                            EXTRACTOR_METATYPE_REVISION_HISTORY);    
-           if (rbuf != NULL)
-             free(rbuf);
-         }
-      }
-    if (author != NULL)
-      free(author);
-    if (filename != NULL)
-      free(filename);
-    if (0 != ret)
-      break;
-  }
-  free(lbuffer);
+  free (lbuffer);
   return ret;
 }
 
 
+/* *************************** custom GSF input method ***************** */
 
-const char *
-EXTRACTOR_ole2_options ()
+G_BEGIN_DECLS
+#define LE_TYPE_INPUT                  (le_input_get_type ())
+#define LE_INPUT(obj)                  (G_TYPE_CHECK_INSTANCE_CAST ((obj), 
TYPE_LE_INPUT, LeInput))
+#define LE_INPUT_CLASS(klass)          (G_TYPE_CHECK_CLASS_CAST ((klass), 
TYPE_LE_INPUT, LeInputClass))
+#define IS_LE_INPUT(obj)               (G_TYPE_CHECK_INSTANCE_TYPE ((obj), 
TYPE_LE_INPUT))
+#define IS_LE_INPUT_CLASS(klass)       (G_TYPE_CHECK_CLASS_TYPE ((klass), 
TYPE_LE_INPUT))
+#define LE_INPUT_GET_CLASS(obj)        (G_TYPE_INSTANCE_GET_CLASS ((obj), 
TYPE_LE_INPUT, LeInputClass))
+
+/**
+ * Overall state of an "LeInput" object.
+ */
+typedef struct _LeInput 
 {
-  /* 
-     Since the Gnome developers think that being unable to
-     unload plugins is an 'acceptable' limitation, we
-     require out-of-process execution for plugins depending
-     on libgsf and other glib-based plugins.
-     See also https://bugzilla.gnome.org/show_bug.cgi?id=374940 
-  */
-  return "oop-only"; 
+  /**
+   * Inherited state from parent (GsfInput).
+   */
+  GsfInput input;
+  
+  /*< private > */
+  /**
+   * Private state of the LeInput.
+   */
+  LeInputPrivate *priv;
+} LeInput;
+
+
+/**
+ * Internal state of an "LeInput" object.
+ */
+typedef struct _LeInputPrivate 
+{
+  /**
+   * Our extraction context.
+   */
+  struct EXTRACTOR_ExtractContext *ec;
+} LeInputPrivate;
+
+
+/**
+ * LeInput's class state.
+ */
+typedef struct _LeInputClass
+{
+  /**
+   * GsfInput is our parent class.
+   */
+  GsfInputClass parent_class;
+
+  /* Padding for future expansion */
+  void (*_gtk_reserved1) (void);
+  void (*_gtk_reserved2) (void);
+  void (*_gtk_reserved3) (void);
+  void (*_gtk_reserved4) (void);
+} LeInputClass;
+
+
+/**
+ * Required method to obtain the LeInput "type".
+ */
+GType
+le_input_get_type (void) G_GNUC_CONST;
+
+
+/**
+ * Constructor for LeInput objects. 
+ *
+ * @param ec extraction context to use
+ * @return the LeInput, NULL on error
+ */
+GsfInput *
+le_input_new (struct EXTRACTOR_ExtractContext *ec);
+G_END_DECLS
+
+
+/**
+ * Macro to create LeInput type definition.
+ */
+G_DEFINE_TYPE (LeInput, le_input, GSF_TYPE_INPUT)
+
+
+/**
+ *
+ */
+static void
+le_input_class_init (LeInputClass *class)
+{
+  // GObjectClass *gobject_class;
+  GsfInputClass *input_class;
+
+  // gobject_class = (GObjectClass *) class;
+  input_class = (GsfInputClass *) class;
+  input_class->read = le_input_read;
+  g_type_class_add_private (class, sizeof (LeInputPrivate));
 }
 
 
-int 
-EXTRACTOR_ole2_extract (const char *data,
-                       size_t size,
-                       EXTRACTOR_MetaDataProcessor proc,
-                       void *proc_cls,
-                       const char *options)
+
+/* *********************** end of custom GSF input method ************* */
+
+
+/**
+ * Main entry method for the OLE2 extraction plugin.  
+ *
+ * @param ec extraction context provided to the plugin
+ */
+void
+EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec)
 {
-  GsfInput * input;
-  GsfInfile * infile;
-  GsfInput * src;
-  const char * name;
-  int i;
+  GsfInput *input;
+  GsfInfile *infile;
+  GsfInput *src;
+  const char *name;
+  unsigned int i;
   unsigned int lcb;
   unsigned int fcb;
-  const unsigned char * data512;
+  const unsigned char *data512;
   unsigned int lid;
-  const char * lang;
+  const char *lang;
   int ret;
 
-  ret = 0;
   if (size < 512 + 898)
-    return 0; /* can hardly be OLE2 */
-  input = gsf_input_memory_new((const guint8 *) data,
-                              (gsf_off_t) size,
-                              FALSE);
-  if (input == NULL)
-    return 0;
-
-  infile = gsf_infile_msole_new(input, NULL);
-  if (infile == NULL) {
-    g_object_unref(G_OBJECT(input));
-    return 0;
-  }
-  lcb = 0;
-  fcb = 0;
-  for (i=0;i<gsf_infile_num_children(infile);i++) {
-    name = gsf_infile_name_by_index (infile, i);
-    src = NULL;
-    if (ret != 0)
-      break;
-    if (name == NULL)
-      continue;
-    if ( (0 == strcmp(name, "\005SummaryInformation"))
-        || (0 == strcmp(name, "\005DocumentSummaryInformation")) ) {
-      src = gsf_infile_child_by_index (infile, i);
-      if (src != NULL)
-       ret = process(src,
-                     proc, 
-                     proc_cls);
+    return; /* can hardly be OLE2 */
+  if (NULL == (input = gsf_input_memory_new ((const guint8 *) data,
+                                            (gsf_off_t) size,
+                                            FALSE)))
+    return;
+  if (NULL == (infile = gsf_infile_msole_new (input, NULL)))
+    {
+      g_object_unref (G_OBJECT (input));
+      return 0;
     }
-    if (0 == strcmp(name, "SfxDocumentInfo")) {
-      src = gsf_infile_child_by_index (infile, i);
-      if ( (src != NULL) && (ret == 0) )
-       ret = processSO(src,
-                       proc,
-                       proc_cls);
+  ret = 0;
+  for (i=0;i<gsf_infile_num_children (infile);i++) 
+    {
+      if (0 != ret)
+       break;
+      if (NULL == (name = gsf_infile_name_by_index (infile, i)))
+       continue;
+      src = NULL;
+      if ( ( (0 == strcmp(name, "\005SummaryInformation")) ||
+            (0 == strcmp(name, "\005DocumentSummaryInformation")) ) &&
+          (NULL != (src = gsf_infile_child_by_index (infile, i))) )
+       ret = process (src,
+                      proc, 
+                      proc_cls);
+      if ( (0 == strcmp (name, "SfxDocumentInfo")) &&
+          (NULL != (src = gsf_infile_child_by_index (infile, i))) )
+       ret = process_star_office (src,
+                                  proc,
+                                  proc_cls);
+      if (NULL != src)
+       g_object_unref (G_OBJECT (src));
     }
-    if (src != NULL)
-      g_object_unref(G_OBJECT(src));
-  }
+  if (0 != ret)
+    goto CLEANUP;
 
   data512 = (const unsigned char*) &data[512];
   lid = data512[6] + (data512[7] << 8);
+  if ( (NULL != (lang = lid_to_language (lid))) &&
+       (0 != (ret = add_metadata (proc, proc_cls,
+                                 lang,
+                                 EXTRACTOR_METATYPE_LANGUAGE))) )
+    goto CLEANUP;
   lcb = data512[726] + (data512[727] << 8) + (data512[728] << 16) + 
(data512[729] << 24);
   fcb = data512[722] + (data512[723] << 8) + (data512[724] << 16) + 
(data512[725] << 24);
-  lang = lidToLanguage(lid);
-  if ( (lang != NULL) && (ret == 0) )
-    ret = addKeyword(proc, proc_cls,
-                    lang,
-                    EXTRACTOR_METATYPE_LANGUAGE);  
-  if (lcb >= 6) {
-    for (i=0;i<gsf_infile_num_children(infile);i++) {
+  if (lcb < 6)
+    goto CLEANUP;
+  for (i=0;i<gsf_infile_num_children (infile);i++) 
+    {
       if (ret != 0)
        break;
-      name = gsf_infile_name_by_index (infile, i);
-      if (name == NULL)
+      if (NULL == (name = gsf_infile_name_by_index (infile, i)))
        continue;
-      if ( (0 == strcmp(name, "1Table")) ||
-          (0 == strcmp(name, "0Table")) ) {
-       src = gsf_infile_child_by_index (infile, i);
-       if (src != NULL) {
-         ret = history_extract(src,
-                               lcb,
-                               fcb,
-                               proc, proc_cls);
-         g_object_unref(G_OBJECT(src));
-       }
-      }
+      if ( ( (0 == strcmp (name, "1Table")) ||
+            (0 == strcmp (name, "0Table")) ) &&
+          (NULL != (src = gsf_infile_child_by_index (infile, i))) )
+       {
+         ret = history_extract (src,
+                                lcb,
+                                fcb,
+                                proc, proc_cls);
+         g_object_unref (G_OBJECT (src));
+       }    
     }
-  }
-  g_object_unref(G_OBJECT(infile));
-  g_object_unref(G_OBJECT(input));
+ CLEANUP:
+  g_object_unref (G_OBJECT (infile));
+  g_object_unref (G_OBJECT (input));
   return ret;
 }
 
 
+/**
+ * Custom log function we give to GSF to disable logging.
+ *
+ * @param log_domain unused
+ * @param log_level unused
+ * @param message unused
+ * @param user_data unused
+ */
 static void 
 nolog (const gchar *log_domain,
        GLogLevelFlags log_level,
        const gchar *message,
-       gpointer user_data) {
+       gpointer user_data) 
+{
+  /* do nothing */
 }
 
 
-void __attribute__ ((constructor)) ole2_ltdl_init() {
+/**
+ * OLE2 plugin constructor. Initializes glib and gsf, in particular
+ * gsf logging is disabled.
+ */
+void __attribute__ ((constructor)) 
+ole2_ltdl_init() 
+{
   g_type_init();
 #ifdef HAVE_GSF_INIT
   gsf_init();
 #endif
   /* disable logging -- thanks, Jody! */
-  g_log_set_handler ("libgsf:msole", G_LOG_LEVEL_CRITICAL | 
G_LOG_LEVEL_WARNING,  &nolog, NULL);
+  g_log_set_handler ("libgsf:msole",
+                    G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING,  
+                    &nolog, NULL);
 }
 
 
-void __attribute__ ((destructor)) ole2_ltdl_fini() {
+/**
+ * OLE2 plugin destructor.  Shutdown of gsf.
+ */
+void __attribute__ ((destructor))
+ole2_ltdl_fini() 
+{
 #ifdef HAVE_GSF_INIT
   gsf_shutdown();
 #endif
 }
 
+
 /* end of ole2_extractor.c */
-
[Prev in Thread]
Current Thread
[Next in Thread]
[GNUnet-SVN] r23196 - Extractor/src/plugins, gnunet <=
Prev by Date: [GNUnet-SVN] r23195 - gnunet/src/testbed
Next by Date: [GNUnet-SVN] r23197 - in Extractor: . src/plugins
Previous by thread: [GNUnet-SVN] r23195 - gnunet/src/testbed
Next by thread: [GNUnet-SVN] r23197 - in Extractor: . src/plugins
Index(es):
- Date
- Thread