gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r1527 - in Extractor: doc src/include src/main src/plugins


From: grothoff
Subject: [GNUnet-SVN] r1527 - in Extractor: doc src/include src/main src/plugins src/plugins/exiv2 src/plugins/ole2 src/plugins/oo src/plugins/thumbnail
Date: Thu, 14 Jul 2005 12:49:02 -0700 (PDT)

Author: grothoff
Date: 2005-07-14 12:48:48 -0700 (Thu, 14 Jul 2005)
New Revision: 1527

Removed:
   Extractor/src/plugins/thumbnail/thumbnailextractor-old.c
Modified:
   Extractor/doc/libextractor.3
   Extractor/src/include/extractor.h
   Extractor/src/include/plibc.h
   Extractor/src/main/extractor.c
   Extractor/src/plugins/exiv2/exiv2extractor.cc
   Extractor/src/plugins/filenameextractor.c
   Extractor/src/plugins/manextractor.c
   Extractor/src/plugins/mp3extractor.c
   Extractor/src/plugins/oggextractor.c
   Extractor/src/plugins/ole2/ole2extractor.c
   Extractor/src/plugins/oo/ooextractor.c
   Extractor/src/plugins/psextractor.c
   Extractor/src/plugins/tarextractor.c
   Extractor/src/plugins/thumbnail/thumbnailextractor.c
Log:
API updates

Modified: Extractor/doc/libextractor.3
===================================================================
--- Extractor/doc/libextractor.3        2005-07-14 19:44:16 UTC (rev 1526)
+++ Extractor/doc/libextractor.3        2005-07-14 19:48:48 UTC (rev 1527)
@@ -1,6 +1,6 @@
-.TH LIBEXTRACTOR 3 "Apr 5, 2005"
+.TH LIBEXTRACTOR 3 "Jul 14, 2005"
 .SH NAME
-libextractor \- meta\-information extraction library 0.5.0
+libextractor \- meta\-information extraction library 0.5.2
 .SH SYNOPSIS
 
 \fB#include <extractor.h>
@@ -28,6 +28,8 @@
 
  \fBEXTRACTOR_KeywordList * EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * 
\fIextractor\fB, const char * \fIfilename\fB);
 
+ \fBEXTRACTOR_KeywordList * EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * 
\fIextractor\fB, const char * \fIdata\fB, size_t \fIsize\fB);
+
  \fBEXTRACTOR_KeywordList * EXTRACTOR_removeEmptyKeywords 
(EXTRACTOR_KeywordList * \fIlist\fB);
 
  \fBEXTRACTOR_KeywordList * EXTRACTOR_removeDuplicateKeywords 
(EXTRACTOR_KeywordList * \fIlist\fB, const unsigned int \fIoptions\fB);

Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h   2005-07-14 19:44:16 UTC (rev 1526)
+++ Extractor/src/include/extractor.h   2005-07-14 19:48:48 UTC (rev 1527)
@@ -161,6 +161,9 @@
 /**
  * Signature of the extract method that each plugin
  * must provide.
+ * 
+ * @param filename MAYBE NULL (!)
+ * @param data must not be modified (!)
  */
 typedef EXTRACTOR_KeywordList * 
 (*ExtractMethod)(const char * filename,
@@ -273,6 +276,22 @@
 
 
 /**
+ * Extract keywords from a buffer in memory
+ * using the available extractors.
+ *
+ * @param extractor the list of extractor libraries
+ * @param data the data of the file
+ * @param size the number of bytes in data
+ * @return the list of keywords found in the file, NULL if none
+ *         were found (or other errors)
+ */
+EXTRACTOR_KeywordList *
+EXTRACTOR_getKeywords2(EXTRACTOR_ExtractorList * extractor,
+                      const char * data,
+                      size_t size);
+
+
+/**
  * Remove duplicate keywords from the list.
  * @param list the original keyword list (destroyed in the process!)
  * @param options a set of options (DUPLICATES_XXXX)
@@ -339,6 +358,31 @@
 unsigned int EXTRACTOR_countKeywords(EXTRACTOR_KeywordList * keywords);
   
 
+/**
+ * This function can be used to decode the binary data
+ * encoded in the libextractor metadata (i.e. for
+ * the  thumbnails).
+ *
+ * @param in 0-terminated string from the meta-data
+ * @return 1 on error, 0 on success
+ */
+int EXTRACTOR_binaryDecode(const unsigned char * in,
+                          unsigned char ** out,
+                          size_t * outSize);
+
+
+/**
+ * Encode the given binary data object
+ * as a 0-terminated C-string according
+ * to the LE binary data encoding standard.
+ *
+ * @return NULL on error, the 0-terminated
+ *  encoding otherwise
+ */
+char * EXTRACTOR_binaryEncode(const char * data,
+                             size_t size);
+
+
 #ifdef __cplusplus
 }
 #endif

Modified: Extractor/src/include/plibc.h
===================================================================
--- Extractor/src/include/plibc.h       2005-07-14 19:44:16 UTC (rev 1526)
+++ Extractor/src/include/plibc.h       2005-07-14 19:48:48 UTC (rev 1527)
@@ -1,20 +1,20 @@
 /*
-     This file is part of PlibC.
-     (C) 2005 Nils Durner (and other contributing authors)
-
-          This library is free software; you can redistribute it and/or
-          modify it under the terms of the GNU Lesser General Public
-          License as published by the Free Software Foundation; either
-          version 2.1 of the License, or (at your option) any later version.
+  This file is part of PlibC.
+  (C) 2005 Nils Durner (and other contributing authors)
+  
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
        
-          This library is distributed in the hope that it will be useful,
-          but WITHOUT ANY WARRANTY; without even the implied warranty of
-          MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-          Lesser General Public License for more details.
-       
-          You should have received a copy of the GNU Lesser General Public
-          License along with this library; if not, write to the Free Software
-          Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 
 USA
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+  
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
 /**

Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c      2005-07-14 19:44:16 UTC (rev 1526)
+++ Extractor/src/main/extractor.c      2005-07-14 19:48:48 UTC (rev 1527)
@@ -647,8 +647,11 @@
     return NULL;
   result = NULL;
   while (extractor != NULL) {
-    result = extractor->extractMethod (filename, buffer, size, result,
-                                      extractor->options);
+    result = extractor->extractMethod(filename,
+                                     buffer, 
+                                     size,
+                                     result,
+                                     extractor->options);
     extractor = extractor->next;
   }
   if (size > 0)
@@ -903,4 +906,110 @@
   return count;
 }
 
+/**
+ * Encode the given binary data object
+ * as a 0-terminated C-string according
+ * to the LE binary data encoding standard.
+ *
+ * @return NULL on error, the 0-terminated
+ *  encoding otherwise
+ */
+char * EXTRACTOR_binaryEncode(const char * data,
+                             size_t size) {
+
+  char * binary;
+  size_t pos;
+  size_t end;
+  size_t wpos;
+  size_t i;
+  unsigned int markers[8]; /* 256 bits */
+  unsigned char marker;
+  char * format;
+
+ /* encode! */
+  binary = malloc(2 + size + (size+256) / 254);
+  if (binary == NULL)
+    return NULL;
+
+  pos = 0;
+  wpos = 0;
+  while (pos < size) {
+    /* find unused value between 1 and 255 in
+       the next 254 bytes */
+    end = pos + 254;
+    if (end < pos)
+      break; /* integer overflow! */
+    if (end > size)
+      end = size;
+    memset(markers, 
+          0, 
+          sizeof(markers));
+    for (i=pos;i<end;i++)
+      markers[data[i]&7] |= 1 << (data[i] >> 3);
+    marker = 1;
+    while (markers[marker&7] & (1 << (marker >> 3))) {
+      marker++;
+      if (marker == 0) {
+       /* assertion failed... */
+       free(binary);
+       return NULL;
+      }
+    }
+    /* recode */
+    binary[wpos++] = marker;
+    for (i=pos;i<end;i++)
+      binary[wpos++] = data[i] == 0 ? marker : data[i];
+    pos = end;
+  }
+  binary[wpos++] = 0; /* 0-termination! */
+  return binary;
+}
+
+
+/**
+ * This function can be used to decode the binary data
+ * encoded in the libextractor metadata (i.e. for
+ * the  thumbnails).
+ *
+ * @param in 0-terminated string from the meta-data
+ * @return 1 on error, 0 on success
+ */
+int EXTRACTOR_binaryDecode(const unsigned char * in,
+                          unsigned char ** out,
+                          size_t * outSize) {
+  unsigned char * buf;
+  size_t pos;
+  size_t wpos;
+  unsigned char marker;
+  size_t i;
+  size_t end;
+  size_t inSize;
+
+  inSize = strlen(in);
+  if (inSize == 0) {
+    *out = NULL;
+    *outSize = 0;
+    return 1;
+  }
+
+  buf = malloc(inSize); /* slightly more than needed ;-) */
+  *out = buf;
+
+  pos = 0;
+  wpos = 0;
+  while (pos < inSize) {
+    end = pos + 255; /* 255 here: count the marker! */
+    if (end > inSize)
+      end = inSize;
+    marker = in[pos++];
+    for (i=pos;i<end;i++)
+      buf[wpos++] = (in[i] == marker) ? 0 : in[i];
+    pos = end;
+  }
+  *outSize = wpos;
+  return 0;
+}
+
+
+
 /* end of extractor.c */

Modified: Extractor/src/plugins/exiv2/exiv2extractor.cc
===================================================================
--- Extractor/src/plugins/exiv2/exiv2extractor.cc       2005-07-14 19:44:16 UTC 
(rev 1526)
+++ Extractor/src/plugins/exiv2/exiv2extractor.cc       2005-07-14 19:48:48 UTC 
(rev 1527)
@@ -83,7 +83,7 @@
 
 extern "C" {
 
-    struct EXTRACTOR_Keywords * libextractor_exiv2_extract(char * filename,
+    struct EXTRACTOR_Keywords * libextractor_exiv2_extract(const char * 
filename,
                                                            unsigned char * 
data,
                                                            size_t size,
                                                            struct 
EXTRACTOR_Keywords * prev) 
@@ -91,11 +91,8 @@
         struct EXTRACTOR_Keywords * result = 0;
 
         try {
-            if (!Exiv2::fileExists(filename, true)) return result;
 
-
-
-        Exiv2::Image::AutoPtr image = Exiv2::ImageFactory::open(filename);
+           Exiv2::Image::AutoPtr image = Exiv2::ImageFactory::open(data, size);
         assert(image.get() != 0);
         image->readMetadata();
         Exiv2::ExifData &exifData = image->exifData();

Modified: Extractor/src/plugins/filenameextractor.c
===================================================================
--- Extractor/src/plugins/filenameextractor.c   2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/filenameextractor.c   2005-07-14 19:48:48 UTC (rev 
1527)
@@ -24,14 +24,17 @@
 
 
 /* "extract" the 'filename' as a keyword */
-struct EXTRACTOR_Keywords * libextractor_filename_extract(const char * 
filename,
-                                                          char * date,
-                                                          size_t size,
-                                                          struct 
EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords * 
+libextractor_filename_extract(const char * filename,
+                             char * date,
+                             size_t size,
+                             struct EXTRACTOR_Keywords * prev) {
   EXTRACTOR_KeywordList * keyword;
   const char * filenameRoot = filename;
   int res;
 
+  if (filename == NULL)
+    return prev;
   for (res=strlen(filename)-1;res>=0;res--)
     if (filename[res] == DIR_SEPARATOR) {
       filenameRoot = &filename[res+1];
@@ -41,8 +44,7 @@
   keyword->next = prev;
   keyword->keyword = convertToUtf8(filenameRoot,
                                   strlen(filenameRoot),
-                                  nl_langinfo(CODESET)
-           );
+                                  nl_langinfo(CODESET));
   keyword->keywordType = EXTRACTOR_FILENAME;
   return keyword;
 }

Modified: Extractor/src/plugins/manextractor.c
===================================================================
--- Extractor/src/plugins/manextractor.c        2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/manextractor.c        2005-07-14 19:48:48 UTC (rev 
1527)
@@ -203,31 +203,63 @@
   return prev;
 }
 
+static voidpf Emalloc(voidpf opaque, uInt items, uInt size) {
+  return malloc(size * items);
+}
+
+static void Efree(voidpf opaque, voidpf ptr) {
+  free(ptr);
+}
+
 /**
  * How many bytes do we actually try to scan? (from the beginning
  * of the file).
  */
 #define MAX_READ 2048
 
-struct EXTRACTOR_Keywords * libextractor_man_extract(const char * filename,
-                                                    char * data,
-                                                    size_t size,
-                                                    struct EXTRACTOR_Keywords 
* prev) {
-  gzFile gz;
+struct EXTRACTOR_Keywords * 
+libextractor_man_extract(const char * filename,
+                        char * data,
+                        size_t size,
+                        struct EXTRACTOR_Keywords * prev) {
+  z_stream strm;
   char * buf;
   int len;
 
-  gz = gzopen(filename, "rb");
-  buf = malloc(MAX_READ);
-  len = gzread(gz, buf, MAX_READ);
-  if (len < 0) {
+  memset(&strm, 
+        0, 
+        sizeof(z_stream));
+  strm.next_in = (char*) data;
+  strm.avail_in = size;
+  strm.total_in = 0;
+  strm.zalloc = &Emalloc;
+  strm.zfree = &Efree;
+  strm.opaque = NULL;
+  if (Z_OK == inflateInit2(&strm,
+                          15 + 32)) {
+    buf = malloc(MAX_READ);
+    if (buf == NULL) {
+      inflateEnd(&strm);
+      return prev;
+    }
+    strm.next_out = buf;
+    strm.avail_out = MAX_READ;
+    inflate(&strm,
+           Z_FINISH);
+    if (strm.total_out > 0) {
+      prev = tryParse(buf,
+                     strm.total_out, 
+                     prev);
+      inflateEnd(&strm);
+      free(buf);
+      return prev;
+    }
     free(buf);
-    gzclose(gz);
-    return prev;
-  }
-  gzclose(gz);
-  prev = tryParse(buf, len, prev);
-  free(buf);
-  return prev;
+    inflateEnd(&strm);
+  } 
+  return tryParse(data, 
+                 size,
+                 prev);
 }
 
+/* end of manextractor.c */

Modified: Extractor/src/plugins/mp3extractor.c
===================================================================
--- Extractor/src/plugins/mp3extractor.c        2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/mp3extractor.c        2005-07-14 19:48:48 UTC (rev 
1527)
@@ -435,7 +435,7 @@
 
 /* mimetype = audio/mpeg */
 struct EXTRACTOR_Keywords *
-libextractor_mp3_extract(char * filename,
+libextractor_mp3_extract(const char * filename,
                         char * data,
                         size_t size,
                         struct EXTRACTOR_Keywords * klist) {

Modified: Extractor/src/plugins/oggextractor.c
===================================================================
--- Extractor/src/plugins/oggextractor.c        2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/oggextractor.c        2005-07-14 19:48:48 UTC (rev 
1527)
@@ -74,7 +74,7 @@
 }
 
 /* mimetype = application/ogg */
-struct EXTRACTOR_Keywords * libextractor_ogg_extract(char * filename,
+struct EXTRACTOR_Keywords * libextractor_ogg_extract(const char * filename,
                                                      char * data,
                                                      size_t size,
                                                      struct EXTRACTOR_Keywords 
* prev) {
@@ -94,20 +94,12 @@
   callbacks.close_func = &closeOk;
   callbacks.tell_func = &tellError;
   if (0 != ov_open_callbacks(NULL, &vf, data, size, callbacks)) {
-#if DEBUG_EXTRACT_OGG
-    fprintf(stderr,"\nError opening file %s as ogg\n",filename);
-#endif
     ov_clear(&vf);
     return prev;
   }
   comments = ov_comment(&vf, -1);
 
   if (NULL == comments) {
-#if DEBUG_EXTRACT_OGG
-    fprintf(stderr,
-           "\nError decoding ogg information of %s, ignoring.\n",
-           filename);
-#endif
     ov_clear(&vf);
     return prev;
   }

Modified: Extractor/src/plugins/ole2/ole2extractor.c
===================================================================
--- Extractor/src/plugins/ole2/ole2extractor.c  2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/ole2/ole2extractor.c  2005-07-14 19:48:48 UTC (rev 
1527)
@@ -44,9 +44,10 @@
   g_type_init(); 
 }
 
-static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordList *oldhead,
-                                             const char *phrase,
-                                             EXTRACTOR_KeywordType type) {
+static struct EXTRACTOR_Keywords * 
+addKeyword(EXTRACTOR_KeywordList *oldhead,
+          const char *phrase,
+          EXTRACTOR_KeywordType type) {
 
    EXTRACTOR_KeywordList * keyword;
    if (strlen(phrase) == 0)
@@ -917,10 +918,11 @@
   return prev;
 }
 
-struct EXTRACTOR_Keywords * libextractor_ole2_extract(const char * filename,
-                                                     char * date,
-                                                     size_t size,
-                                                     struct EXTRACTOR_Keywords 
* prev) {
+struct EXTRACTOR_Keywords * 
+libextractor_ole2_extract(const char * filename,
+                         char * date,
+                         size_t size,
+                         struct EXTRACTOR_Keywords * prev) {
   GsfInput   *input;
   GsfInfile  *infile;
   int i;

Modified: Extractor/src/plugins/oo/ooextractor.c
===================================================================
--- Extractor/src/plugins/oo/ooextractor.c      2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/oo/ooextractor.c      2005-07-14 19:48:48 UTC (rev 
1527)
@@ -21,6 +21,7 @@
 #include "platform.h"
 #include "extractor.h"
 #include "unzip.h"
+#include "ioapi.h"
 
 #define CASESENSITIVITY (0)
 #define MAXFILENAME (256)
@@ -68,11 +69,85 @@
   { NULL, 0 },
 };
 
+typedef struct Ecls {
+  char * data;
+  size_t size;
+  size_t pos;
+} Ecls;
 
-struct EXTRACTOR_Keywords * libextractor_oo_extract(const char * filename,
-                                                   char * data,
-                                                   size_t size,
-                                                   struct EXTRACTOR_Keywords * 
prev) {
+static voidpf Eopen_file_func (voidpf opaque,
+                              const char* filename,
+                              int mode) {
+  if (0 == strcmp(filename,
+                 "ERROR"))
+    return opaque;
+  else
+    return NULL;
+}
+static uLong Eread_file_func(voidpf opaque, 
+                            voidpf stream, 
+                            void* buf,
+                            uLong size) {
+  Ecls * e = opaque;
+  uLong ret;
+
+  ret = e->size - e->pos;
+  if (ret > size)
+    ret = size;
+  memcpy(buf, 
+        e->data,
+        ret);
+  return ret;
+}
+
+static long Etell_file_func(voidpf opaque,
+                           voidpf stream) {
+  Ecls * e = opaque;
+  return e->pos;
+}
+
+static long Eseek_file_func(voidpf opaque,
+                           voidpf stream, 
+                           uLong offset, 
+                           int origin) {
+  Ecls * e = opaque;
+
+  switch (origin) {
+  case ZLIB_FILEFUNC_SEEK_SET:
+    e->pos = offset;    
+    break;
+  case ZLIB_FILEFUNC_SEEK_END:
+    if (offset > e->size)
+      return -1;
+    e->pos = e->size - offset;
+    break;
+  case ZLIB_FILEFUNC_SEEK_CUR:
+    if (offset < - e->pos)
+      return -1;
+    e->pos += offset;
+    break;
+  default:
+    return -1;
+  }
+  return e->pos;
+}
+
+static int Eclose_file_func(voidpf opaque, 
+                           voidpf stream) {
+  Ecls * e = opaque;
+  return 0;
+}
+static int Etesterror_file_func(voidpf opaque, 
+                               voidpf stream) {
+  return 0;
+}
+
+
+struct EXTRACTOR_Keywords * 
+libextractor_oo_extract(const char * filename,
+                       char * data,
+                       size_t size,
+                       struct EXTRACTOR_Keywords * prev) {
   char filename_inzip[MAXFILENAME];
   unzFile uf;
   unz_file_info file_info;
@@ -80,13 +155,27 @@
   char * pbuf;
   size_t buf_size;
   int i;
+  zlib_filefunc_def io;
+  Ecls cls;
 
   if (size < 100)
     return prev;
   if ( !( ('P'==data[0]) && ('K'==data[1]) && (0x03==data[2]) && 
(0x04==data[3])) )
     return prev;
 
-  uf = unzOpen(filename);
+  cls.data = data;
+  cls.size = size;
+  cls.pos = 0;
+  io.zopen_file = &Eopen_file_func;
+  io.zread_file = &Eread_file_func;
+  io.zwrite_file = NULL;
+  io.ztell_file = &Etell_file_func;
+  io.zseek_file = &Eseek_file_func;
+  io.zclose_file = &Eclose_file_func;
+  io.zerror_file = &Etesterror_file_func;
+  io.opaque = &cls;
+
+  uf = unzOpen2("ERROR", &io);
   if (uf == NULL)
     return prev;
 

Modified: Extractor/src/plugins/psextractor.c
===================================================================
--- Extractor/src/plugins/psextractor.c 2005-07-14 19:44:16 UTC (rev 1526)
+++ Extractor/src/plugins/psextractor.c 2005-07-14 19:48:48 UTC (rev 1527)
@@ -142,7 +142,7 @@
 };
 
 /* mimetype = application/postscript */
-struct EXTRACTOR_Keywords * libextractor_ps_extract(char * filename,
+struct EXTRACTOR_Keywords * libextractor_ps_extract(const char * filename,
                                                     char * data,
                                                     size_t size,
                                                     struct EXTRACTOR_Keywords 
* prev) {

Modified: Extractor/src/plugins/tarextractor.c
===================================================================
--- Extractor/src/plugins/tarextractor.c        2005-07-14 19:44:16 UTC (rev 
1526)
+++ Extractor/src/plugins/tarextractor.c        2005-07-14 19:48:48 UTC (rev 
1527)
@@ -1,6 +1,6 @@
 /*
      This file is part of libextractor.
-     (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
+     (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -136,20 +136,29 @@
   return prev;
 }
 
+static voidpf Emalloc(voidpf opaque, uInt items, uInt size) {
+  return malloc(size * items);
+}
+
+static void Efree(voidpf opaque, voidpf ptr) {
+  free(ptr);
+}
+
 /* do not decompress tar.gz files > 16 MB */
 #define MAX_TGZ_SIZE 16 * 1024 * 1024
 
-struct EXTRACTOR_Keywords * libextractor_tar_extract(const char * filename,
-                                                    const unsigned char * data,
-                                                    size_t size,
-                                                    struct EXTRACTOR_Keywords 
* prev) {
+struct EXTRACTOR_Keywords * 
+libextractor_tar_extract(const char * filename,
+                        const unsigned char * data,
+                        size_t size,
+                        struct EXTRACTOR_Keywords * prev) {  
   if ( (data[0] == 0x1f) &&
        (data[1] == 0x8b) &&
        (data[2] == 0x08) ) {
     time_t ctime;
     char * buf;
     size_t bufSize;
-    gzFile gzf;
+    z_stream strm;
 
     /* Creation time */
     ctime = ((((((  (unsigned int)data[7] << 8)
@@ -172,21 +181,33 @@
     if (bufSize > MAX_TGZ_SIZE) {
       return prev;
     }
-    gzf = gzopen(filename, "rb");
-    if (gzf == NULL) {
+
+    memset(&strm, 0, sizeof(z_stream));
+    strm.next_in = (char*) data;
+    strm.avail_in = size;
+    strm.total_in = 0;
+    strm.zalloc = &Emalloc;
+    strm.zfree = &Efree;
+    strm.opaque = NULL;
+    if (Z_OK != inflateInit2(&strm,
+                            15 + 32))
       return prev;
-    }
     buf = malloc(bufSize);
     if (buf == NULL) {
-      gzclose(gzf);
+      inflateEnd(&strm);
       return prev;
     }
-    if (bufSize != gzread(gzf, buf, bufSize)) {
+    strm.next_out = buf;
+    strm.avail_out = bufSize;
+    inflate(&strm,
+           Z_FINISH);
+    if (strm.total_out == 0) {
+      inflateEnd(&strm);
       free(buf);
-      gzclose(gzf);
       return prev;
     }
-    gzclose(gzf);
+    bufSize = strm.total_out;
+    inflateEnd(&strm);
     prev = tar_extract(buf, bufSize, prev);
     free(buf);
     return prev;

Deleted: Extractor/src/plugins/thumbnail/thumbnailextractor-old.c
===================================================================
--- Extractor/src/plugins/thumbnail/thumbnailextractor-old.c    2005-07-14 
19:44:16 UTC (rev 1526)
+++ Extractor/src/plugins/thumbnail/thumbnailextractor-old.c    2005-07-14 
19:48:48 UTC (rev 1527)
@@ -1,253 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2005 Vidyut Samanta and Christian Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
- */
-
-/**
- * @file thumbnailextractor.c
- * @author Christian Grothoff
- * @brief this extractor produces a binary (!) encoded
- * thumbnail of images (using imagemagick).  The bottom
- * of the file includes a decoder method that can be used
- * to reproduce the 128x128 PNG thumbnails.
- */
-
-#include "platform.h"
-#include "extractor.h"
-#include <wand/magick_wand.h>
-
-#define THUMBSIZE 128
-
-static EXTRACTOR_KeywordList * addKeyword(EXTRACTOR_KeywordType type,
-                                         char * keyword,
-                                         EXTRACTOR_KeywordList * next) {
-  EXTRACTOR_KeywordList * result;
-
-  if (keyword == NULL)
-    return next;
-  result = malloc(sizeof(EXTRACTOR_KeywordList));
-  result->next = next;
-  result->keyword = keyword;
-  result->keywordType = type;
-  return result;
-}
-
-
-/* which mime-types maybe subjected to
-   the thumbnail extractor (ImageMagick
-   crashes and/or prints errors for bad
-   formats, so we need to be rather
-   conservative here) */
-static char * whitelist[] = {
-  "image/jpeg",
-  "image/gif",
-  "image/miff",
-  "image/mng",
-  "image/png",
-  "image/tiff",
-  "image/x-bmp",
-  "image/x-mng",
-  "image/x-png",
-  "image/x-xpm",
-  "image/xcf",
-  NULL,
-};
-
-struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char * 
filename,
-                                                          const char * data,
-                                                          size_t size,
-                                                          struct 
EXTRACTOR_Keywords * prev) {
-  MagickBooleanType status;
-  MagickWand * magick_wand;
-  size_t length;
-  char * thumb;
-  unsigned long width;
-  unsigned long height;
-  char * binary;
-  size_t pos;
-  size_t end;
-  size_t wpos;
-  size_t i;
-  unsigned int markers[8]; /* 256 bits */
-  unsigned char marker;
-  const char * mime;
-  int j;
-
-  /* if the mime-type of the file is not whitelisted
-     do not run the thumbnail extactor! */
-  mime = EXTRACTOR_extractLast(EXTRACTOR_MIMETYPE,
-                              prev);
-  if (mime == NULL)
-    return prev;
-  j = 0;
-  while (whitelist[j] != NULL) {
-    if (0 == strcmp(whitelist[j], mime))
-      break;
-    j++;
-  }
-  if (whitelist[j] == NULL)
-    return prev;
-
-  magick_wand = NewMagickWand();
-  status = MagickReadImageBlob(magick_wand, data, size);
-  if (status == MagickFalse) {
-    DestroyMagickWand(magick_wand);
-    return prev;
-  }
-  MagickResetIterator(magick_wand);
-  if (MagickNextImage(magick_wand) == MagickFalse)
-    return prev;
-
-  height = MagickGetImageHeight(magick_wand);
-  width = MagickGetImageWidth(magick_wand);
-  if (height == 0)
-    height = 1;
-  if (width == 0)
-    width = 1;
-  if ( (height <= THUMBSIZE) &&
-       (width <= THUMBSIZE) ) {
-    DestroyMagickWand(magick_wand);
-    return prev;
-  }
-
-
-  if (height > THUMBSIZE) {
-    width = width * THUMBSIZE / height;
-    height = THUMBSIZE;
-  }
-  if (width > THUMBSIZE) {
-    height = height * THUMBSIZE / width;
-    width = THUMBSIZE;
-  }
-  MagickResizeImage(magick_wand, height, width, LanczosFilter, 1.0);
-  MagickSetImageDepth(magick_wand,
-                     8);
-  MagickSetImageChannelDepth(magick_wand,
-                            RedChannel,
-                            2);
-  MagickCommentImage(magick_wand, "");
-  MagickSetImageChannelDepth(magick_wand,
-                            GreenChannel,
-                            2);
-  MagickSetImageChannelDepth(magick_wand,
-                            BlueChannel,
-                            2);
-  MagickSetImageChannelDepth(magick_wand,
-                            OpacityChannel,
-                            2);
-  MagickSetImageInterlaceScheme(magick_wand,
-                               NoInterlace);
-
-  if (MagickFalse == MagickSetImageFormat(magick_wand, "png")) {
-    DestroyMagickWand(magick_wand);
-    return prev;
-  }
-  thumb = MagickGetImageBlob(magick_wand, &length);
-  DestroyMagickWand(magick_wand);
-  if (thumb == NULL)
-    return prev;
-
-
-  /* encode! */
-  binary = malloc(2 + length + (length+256) / 254);
-  if (binary == NULL)
-    return prev;
-
-  pos = 0;
-  wpos = 0;
-  while (pos < length) {
-    /* find unused value between 1 and 255 in
-       the next 254 bytes */
-    end = pos + 254;
-    if (end < pos)
-      break; /* integer overflow! */
-    if (end > length)
-      end = length;
-    memset(markers, 0, sizeof(markers));
-    for (i=pos;i<end;i++)
-      markers[thumb[i]&7] |= 1 << (thumb[i] >> 3);
-    marker = 1;
-    while (markers[marker&7] & (1 << (marker >> 3))) {
-      marker++;
-      if (marker == 0) {
-       /* assertion failed... */
-       free(binary);
-       free(thumb);
-       return prev;
-      }
-    }
-    /* recode */
-    binary[wpos++] = marker;
-    for (i=pos;i<end;i++)
-      binary[wpos++] = thumb[i] == 0 ? marker : thumb[i];
-    pos = end;
-  }
-  binary[wpos++] = 0; /* 0-termination! */
-  free(thumb);
-  return addKeyword(EXTRACTOR_THUMBNAIL_DATA,
-                   binary,
-                   prev);
-}
-
-#if 0
-
-/**
- * This function can be used to decode the binary data
- * stream produced by the thumbnailextractor.
- *
- * @param in 0-terminated string from the meta-data
- * @return 1 on error, 0 on success
- */
-int decodeThumbnail(const unsigned char * in,
-                   unsigned char ** out,
-                   size_t * outSize) {
-  unsigned char * buf;
-  size_t pos;
-  size_t wpos;
-  unsigned char marker;
-  size_t i;
-  size_t end;
-  size_t inSize;
-
-  inSize = strlen(in);
-  if (inSize == 0) {
-    *out = NULL;
-    *outSize = 0;
-    return 1;
-  }
-
-  buf = malloc(inSize); /* slightly more than needed ;-) */
-  *out = buf;
-
-  pos = 0;
-  wpos = 0;
-  while (pos < inSize) {
-    end = pos + 255; /* 255 here: count the marker! */
-    if (end > inSize)
-      end = inSize;
-    marker = in[pos++];
-    for (i=pos;i<end;i++)
-      buf[wpos++] = (in[i] == marker) ? 0 : in[i];
-    pos = end;
-  }
-  *outSize = wpos;
-  return 0;
-}
-
-
-#endif

Modified: Extractor/src/plugins/thumbnail/thumbnailextractor.c
===================================================================
--- Extractor/src/plugins/thumbnail/thumbnailextractor.c        2005-07-14 
19:44:16 UTC (rev 1526)
+++ Extractor/src/plugins/thumbnail/thumbnailextractor.c        2005-07-14 
19:48:48 UTC (rev 1527)
@@ -79,6 +79,7 @@
                                                           const char * data,
                                                           size_t size,
                                                           struct 
EXTRACTOR_Keywords * prev) {
+  GdkPixbufLoader * loader;
   GdkPixbuf * in;
   GdkPixbuf * out;
   size_t length;
@@ -112,8 +113,14 @@
   if (whitelist[j] == NULL)
     return prev;
 
-  in = gdk_pixbuf_new_from_file(filename,
-                               &err);
+  loader = gdk_pixbuf_loader_new();
+  gdk_pixbuf_loader_write(loader,
+                         data,
+                         size,
+                         NULL);    
+  in = gdk_pixbuf_loader_get_pixbuf(loader);
+  gdk_pixbuf_loader_close(loader,
+                         NULL);
   if (in == NULL)
     return prev;
   height = gdk_pixbuf_get_height(in);
@@ -164,92 +171,15 @@
   if (thumb == NULL)
     return prev;
 
-
-  /* encode! */
-  binary = malloc(2 + length + (length+256) / 254);
+  binary
+    = EXTRACTOR_binaryEncode(thumb,
+                            length);
+  FREE(thumb);
   if (binary == NULL)
     return prev;
-
-  pos = 0;
-  wpos = 0;
-  while (pos < length) {
-    /* find unused value between 1 and 255 in
-       the next 254 bytes */
-    end = pos + 254;
-    if (end < pos)
-      break; /* integer overflow! */
-    if (end > length)
-      end = length;
-    memset(markers, 0, sizeof(markers));
-    for (i=pos;i<end;i++)
-      markers[thumb[i]&7] |= 1 << (thumb[i] >> 3);
-    marker = 1;
-    while (markers[marker&7] & (1 << (marker >> 3))) {
-      marker++;
-      if (marker == 0) {
-       /* assertion failed... */
-       free(binary);
-       free(thumb);
-       return prev;
-      }
-    }
-    /* recode */
-    binary[wpos++] = marker;
-    for (i=pos;i<end;i++)
-      binary[wpos++] = thumb[i] == 0 ? marker : thumb[i];
-    pos = end;
-  }
-  binary[wpos++] = 0; /* 0-termination! */
-  free(thumb);
   return addKeyword(EXTRACTOR_THUMBNAIL_DATA,
                    binary,
                    prev);
 }
 
-#if 0
-
-/**
- * This function can be used to decode the binary data
- * stream produced by the thumbnailextractor.
- *
- * @param in 0-terminated string from the meta-data
- * @return 1 on error, 0 on success
- */
-int decodeThumbnail(const unsigned char * in,
-                   unsigned char ** out,
-                   size_t * outSize) {
-  unsigned char * buf;
-  size_t pos;
-  size_t wpos;
-  unsigned char marker;
-  size_t i;
-  size_t end;
-  size_t inSize;
-
-  inSize = strlen(in);
-  if (inSize == 0) {
-    *out = NULL;
-    *outSize = 0;
-    return 1;
-  }
-
-  buf = malloc(inSize); /* slightly more than needed ;-) */
-  *out = buf;
-
-  pos = 0;
-  wpos = 0;
-  while (pos < inSize) {
-    end = pos + 255; /* 255 here: count the marker! */
-    if (end > inSize)
-      end = inSize;
-    marker = in[pos++];
-    for (i=pos;i<end;i++)
-      buf[wpos++] = (in[i] == marker) ? 0 : in[i];
-    pos = end;
-  }
-  *outSize = wpos;
-  return 0;
-}
-
-
-#endif
+/* end of thumbnailextractor.c */





reply via email to

[Prev in Thread] Current Thread [Next in Thread]