[GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extra

gnunet-svn
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extra

From:	grothoff
Subject:	[GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extractor-docs/WWW GNUnet/src/util
Date:	Sat, 25 Mar 2006 12:06:07 -0800 (PST)
Author: grothoff
Date: 2006-03-25 12:06:01 -0800 (Sat, 25 Mar 2006)
New Revision: 2575

Modified:
   Extractor-docs/WWW/index.php
   Extractor-docs/WWW/oldnews.php3
   Extractor/ChangeLog
   Extractor/TODO
   Extractor/configure.ac
   Extractor/doc/extract.1
   Extractor/doc/libextractor.3
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/pdfextractor.c
   Extractor/src/plugins/pngextractor.c
   GNUnet/src/util/logging.c
Log:
sync

Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/ChangeLog 2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,3 +1,8 @@
+Fri Mar 24 21:43:43 PST 2006
+       Started re-implementation of PDF support from scratch
+       (incomplete but working).  Improvements to the build
+       system.
+       
 Thu Mar  9 17:46:39 PST 2006
         Added support for wordleaker (additional meta-data for
         OLE2 streams).  Releasing libextractor 0.5.11.

Modified: Extractor/TODO
===================================================================
--- Extractor/TODO      2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/TODO      2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,5 +1,4 @@
 FIX:
-* HTML-extractor now broken (!) Also crappy code. FIX?!
 * check exiv2 memory consumption on very large files; 
   also investigate 500kb (!) allocation/leak in exiv2 on test/test.html
   (reported by valgrind)

Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac      2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/configure.ac      2006-03-25 20:06:01 UTC (rev 2575)
@@ -261,6 +261,25 @@
         printable=1])
 AM_CONDITIONAL(HAVE_PRINTABLE, test x$printable != x0)
 
+
+xpdf=0
+AC_MSG_CHECKING([whether to enable xpdf-based extractor])
+AC_ARG_ENABLE(xpdf,
+ [AC_HELP_STRING([--enable-xpdf],[Enable xpdf-based extractor])
+  AC_HELP_STRING([--disable-xpdf],[Disable xpdf-based extractor])],
+ [case "$enableval" in
+  no)  AC_MSG_RESULT(no)
+        xpdf=0
+       ;;
+  *)    AC_MSG_RESULT(yes)
+        xpdf=1
+       ;;
+  esac],
+  [     AC_MSG_RESULT(no)
+        xpdf=0])
+AM_CONDITIONAL(HAVE_XPDF, test x$xpdf != x0)
+
+
 exiv2=1
 AC_MSG_CHECKING([whether to enable exiv2 extractor])
 AC_ARG_ENABLE(exiv2,
@@ -303,3 +322,48 @@
 ])
 
 AC_OUTPUT
+
+if test "x$xpdf" = "x1"
+then
+ AC_MSG_NOTICE([NOTICE: xpdf enabled (xpdf has a bad security record)])
+else 
+ AC_MSG_NOTICE([NOTICE: xpdf disabled (result: limited PDF support)])
+fi
+
+if test "x$exiv2" = "x0"
+then
+ AC_MSG_NOTICE([NOTICE: exiv2 disabled])
+fi
+
+
+if test "x$printable" = "x0"
+then
+ AC_MSG_NOTICE([NOTICE: printable plugins disabled])
+else 
+ AC_MSG_NOTICE([NOTICE: printable plugins enabled (will need 150 MB memory to 
compile)])
+fi
+
+if test "x$without_glib" = "xtrue"
+then
+ AC_MSG_NOTICE([NOTICE: glib not used, no OLE2 (MS Office) support])
+fi
+
+if test "x$without_gtk" = "xtrue"
+then
+ AC_MSG_NOTICE([NOTICE: gtk not found, no thumbnail support])
+fi
+
+if test "x$HAVE_VORBISFILE_TRUE" = "x#"
+then
+ AC_MSG_NOTICE([NOTICE: vorbis support disabled])
+fi
+
+if test "x$HAVE_BZ2_TRUE" = "x#"
+then
+ AC_MSG_NOTICE([NOTICE: bzip2 support disabled])
+fi
+
+if test "x$HAVE_ZLIB_TRUE" = "x#"
+then
+ AC_MSG_ERROR([FATAL: zlib not found (headers not installed?)])
+fi

Modified: Extractor/doc/extract.1
===================================================================
--- Extractor/doc/extract.1     2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/doc/extract.1     2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,4 +1,4 @@
-.TH EXTRACT 1 "April 28, 2005" "libextractor 0.4.2"
+.TH EXTRACT 1 "April 28, 2005" "libextractor 0.5.11"
 .\" $Id
 .SH NAME
 extract
@@ -32,7 +32,7 @@
 \&...
 .br
 .SH DESCRIPTION
-This manual page documents version 0.4.0 of the 
+This manual page documents version 0.5.11 of the 
 .B extract 
 command.
 .PP

Modified: Extractor/doc/libextractor.3
===================================================================
--- Extractor/doc/libextractor.3        2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/doc/libextractor.3        2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,6 +1,6 @@
 .TH LIBEXTRACTOR 3 "Jul 14, 2005"
 .SH NAME
-libextractor \- meta\-information extraction library 0.5.2
+libextractor \- meta\-information extraction library 0.5.11
 .SH SYNOPSIS
 
 \fB#include <extractor.h>

Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/src/plugins/Makefile.am   2006-03-25 20:06:01 UTC (rev 2575)
@@ -15,8 +15,17 @@
  exiv2dir=exiv2
 endif
 
-SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm pdf $(thumbdir) 
$(exiv2dir) wordleaker
+if HAVE_XPDF
+ xpdfdir=pdf
+else
+ pdfplugin=libextractor_pdf.la 
+endif
 
+# toggle for development
+# SUBDIRS = . 
+SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(thumbdir) 
$(exiv2dir) wordleaker
+
+
 if HAVE_VORBISFILE
  extraogg = libextractor_ogg.la
 endif
@@ -30,7 +39,7 @@
 oodir = oo
 endif
 
-plugin_LTLIBRARIES = \
+plugin_LTLIBRARIES = $(pdfplugin) \
   libextractor_asf.la \
   libextractor_deb.la \
   libextractor_dvi.la \
@@ -92,6 +101,13 @@
 libextractor_wav_la_LDFLAGS = \
   $(PLUGINFLAGS)  $(retaincommand)
 
+libextractor_pdf_la_SOURCES = \
+  pdfextractor.c
+libextractor_pdf_la_LDFLAGS = \
+  $(PLUGINFLAGS)  $(retaincommand)
+libextractor_pdf_la_LIBADD = \
+  libconvert.la  
+
 libextractor_mp3_la_SOURCES = \
   mp3extractor.c 
 libextractor_mp3_la_LDFLAGS = \

Modified: Extractor/src/plugins/pdfextractor.c
===================================================================
--- Extractor/src/plugins/pdfextractor.c        2006-03-25 18:44:05 UTC (rev 
2574)
+++ Extractor/src/plugins/pdfextractor.c        2006-03-25 20:06:01 UTC (rev 
2575)
@@ -18,9 +18,25 @@
      Boston, MA 02111-1307, USA.
  */
 
+/**
+ * TODO:
+ * - code clean up (factor out some parsing aspects?)
+ * - proper string decoding (escape sequences)
+ * - proper dictionary support
+ * - filters (compression!)
+ * - page count (and other document catalog information,
+ *   such as language, viewer preferences, page layout,
+ *   Metadatastreams (10.2.2), legal and permissions info)
+ * - pdf 1.5 support ((compressed) cross reference streams)
+ */
+
 #include "platform.h"
 #include "extractor.h"
 #include <zlib.h>
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 1
+#endif
+#include <time.h>
 #include "convert.h"
 
 static char * stndup(const char * str,
@@ -32,23 +48,9 @@
   return tmp;
 }
 
-/**
- * strnlen is GNU specific, let's redo it here to be
- * POSIX compliant.
- */
-static size_t stnlen(const char * str,
-                    size_t maxlen) {
-  size_t ret;
-  ret = 0;
-  while ( (ret < maxlen) &&
-         (str[ret] != '\0') )
-    ret++;
-  return ret;
-}
-
 static struct EXTRACTOR_Keywords * 
 addKeyword(EXTRACTOR_KeywordType type,
-          const char * keyword,
+          char * keyword,
           struct EXTRACTOR_Keywords * next) {
   EXTRACTOR_KeywordList * result;
 
@@ -56,26 +58,108 @@
     return next;
   result = malloc(sizeof(EXTRACTOR_KeywordList));
   result->next = next;
-  result->keyword = strdup(keyword);
+  result->keyword = keyword;
   result->keywordType = type;
   return result;
 }
 
+
+
+static char * 
+dateDecode(const char * pdfString) {
+  unsigned char * ret;
+
+  if (pdfString == NULL)
+    return NULL;
+  if (strlen(pdfString) < 4)
+    return NULL;
+  return stndup(&pdfString[3], strlen(pdfString) - 4);
+}
+
+static unsigned char * 
+stringDecode(const char * pdfString,
+            size_t * size) {
+  size_t slen;
+  unsigned char * ret;
+  char hex[3];
+  int i;
+  int val;
+
+  slen = strlen(pdfString);
+  if (slen < 2)
+    return NULL;
+  switch (pdfString[0]) {
+  case '(':
+    if (pdfString[slen-1] != ')')    
+      return NULL;
+    /* todo: recode escape sequences! */
+    *size = slen - 2;
+    return stndup(&pdfString[1], slen-2);     
+  case '<':
+    if (pdfString[slen-1] != '>')
+      return NULL;
+    hex[2] = '\0';
+    ret = malloc(1 + ((slen - 1) / 2));
+    for (i=0;i<slen-2;i+=2) {
+      hex[0] = pdfString[i+1];
+      hex[1] = '0';
+      if (i + 1 < slen)
+       hex[1] = pdfString[i+2];
+      if ( (1 != sscanf(hex, "%x", &val)) &&
+          (1 != sscanf(hex, "%X", &val)) ) {
+       free(ret);
+       return NULL;
+      }
+      ret[i/2] = val;
+    }
+    ret[(slen-1)/2] = '\0';
+    *size = (slen-1) / 2;
+    return ret;
+  }
+  return NULL;
+}
+
+static char * 
+charsetDecode(const unsigned char * in,
+             size_t size) {
+  if (in == NULL)
+    return NULL;
+  if ( (size < 2) ||
+       (in[0] != 0xfe) ||
+       (in[1] != 0xff) ) {
+    /* TODO: extend glibc with
+       character set that corresponds to
+       Adobe's extended ISOLATIN1 encoding! */
+    return convertToUtf8(in,
+                        size,
+                        "CSISOLATIN1");
+  } else { 
+    return convertToUtf8(&in[2],
+                        size - 2,
+                        "UNICODEBIG");
+  }
+    
+}
+
 static struct {
   char * name;
   EXTRACTOR_KeywordType type;
 } tagmap[] = {
-   { "Author" , EXTRACTOR_AUTHOR},
-   { "Description" , EXTRACTOR_DESCRIPTION},
-   { "Comment", EXTRACTOR_COMMENT},
-   { "Copyright", EXTRACTOR_COPYRIGHT},
-   { "Source", EXTRACTOR_SOURCE},
-   { "Creation Time", EXTRACTOR_DATE},
-   { "Title", EXTRACTOR_TITLE},
-   { "Software", EXTRACTOR_SOFTWARE},
-   { "Disclaimer", EXTRACTOR_DISCLAIMER},
-   { "Warning", EXTRACTOR_WARNING},
-   { "Signature", EXTRACTOR_RESOURCE_IDENTIFIER},
+   { "/CreationDate", EXTRACTOR_CREATION_DATE},
+   { "/Author" , EXTRACTOR_AUTHOR},
+   { "/Description" , EXTRACTOR_DESCRIPTION},
+   { "/Title" , EXTRACTOR_TITLE},
+   { "/Comment", EXTRACTOR_COMMENT},
+   { "/Copyright", EXTRACTOR_COPYRIGHT},
+   { "/Subject", EXTRACTOR_SUBJECT},
+   { "/PTEX.Fullbanner", EXTRACTOR_SOFTWARE},
+   { "/Creator", EXTRACTOR_CREATOR},
+   { "/ModDate", EXTRACTOR_MODIFICATION_DATE},
+   { "/Producer", EXTRACTOR_PRODUCER},
+   { "/Software", EXTRACTOR_SOFTWARE},
+   { "/Keywords", EXTRACTOR_KEYWORDS},
+   { "/Warning", EXTRACTOR_WARNING},
+   { "/Signature", EXTRACTOR_RESOURCE_IDENTIFIER},
    { NULL, EXTRACTOR_UNKNOWN},
 };
 
@@ -97,7 +181,9 @@
                         size_t size,
                         struct EXTRACTOR_Keywords * prev) {
   size_t pos;
+  size_t spos;
   size_t steps;
+  size_t mlen;
   unsigned int xstart;
   unsigned int xcount;
   unsigned int xinfo;
@@ -107,6 +193,10 @@
   unsigned long long info_offset;
   char buf[MAX_STEPS+1];
   int i;
+  char * meta;
+  unsigned char * dmeta;
+  char pcnt[20];
+  float version;
 
   while ( (size > 0) && (IS_NL(data[size-1])) )
     size--;
@@ -116,6 +206,20 @@
     return prev;
   if (0 != memcmp(&data[size - strlen(PDF_EOF)], PDF_EOF, strlen(PDF_EOF))) 
     return prev;
+  /* PDF format is pretty much sure by now */
+  memcpy(buf,
+        data,
+        8);
+  buf[8] = '\0';
+  if (1 != sscanf(buf, "%%PDF-%f", &version)) {
+    return prev;
+  }
+  sprintf(pcnt, "PDF %.1f", version);
+  prev = addKeyword(EXTRACTOR_FORMAT,
+                   strdup(pcnt),
+                   prev);
+
+
   
   pos = size - strlen(PDF_EOF) - strlen(PDF_SXR);
   steps = 0;
@@ -123,21 +227,20 @@
          (pos > 0) &&
          (0 != memcmp(&data[pos], PDF_SXR, strlen(PDF_SXR))) ) 
     pos--;
-  printf("pos: %u\n", pos);
-  if (0 != memcmp(&data[pos], PDF_SXR, strlen(PDF_SXR)))
+  if (0 != memcmp(&data[pos], PDF_SXR, strlen(PDF_SXR))) {
+    /* cross reference streams not yet supported! */
     return prev; 
+  }
   memcpy(buf, &data[pos + strlen(PDF_SXR)], steps);
   buf[steps] = '\0';
   if (1 != sscanf(buf, "%llu", &startxref)) 
     return prev;
-  printf("startxref: %llu\n", startxref);
   if (startxref >= size - strlen(PDF_XREF))
     return prev;
   if (0 != memcmp(&data[startxref], PDF_XREF, strlen(PDF_XREF)))
     return prev;
   haveValidXref = 0;
   xrefpos = startxref + strlen(PDF_XREF);
-
   while (1) {
     pos = xrefpos;
     while ( (pos < size) && (IS_NL(data[pos])) )
@@ -146,10 +249,6 @@
     buf[MIN(MAX_STEPS,size-pos)] = '\0';
     if (2 != sscanf(buf, "%u %u", &xstart, &xcount)) 
       break;
-    printf("xstart: %u - xcount: %u - pos %u\n",
-          xstart,
-          xcount,
-          pos);
     while ( (pos < size) && (! IS_NL(data[pos])) )
       pos++;
     if ( (pos < size) && IS_NL(data[pos]))
@@ -158,8 +257,6 @@
     if ( (xrefpos >= size) || (xrefpos < pos) )
       return prev; /* invalid xref size */
     haveValidXref = 1;
-    printf("xref portion ends at %llu\n",
-          xrefpos);
   }
   if (! haveValidXref)
     return prev;
@@ -170,6 +267,7 @@
                  strlen(PDF_TRAILER))) 
     return prev;
   pos += strlen(PDF_TRAILER);
+
   SKIP("<< \n\r", pos, data, size);
   while ( (pos < size) &&
          (pos + strlen(PDF_INFO) < size) &&
@@ -186,8 +284,7 @@
     }
     while ( (pos < size) &&
            (IS_NL(data[pos]) || isspace(data[pos]) ) )
-      pos++;
-  }
+      pos++;  }
   if ( ! ( (pos < size) &&
           (pos + strlen(PDF_INFO) < size) &&
           (0 == memcmp(&data[pos],
@@ -207,7 +304,6 @@
     }
   if (1 != sscanf(buf, "%u", &xinfo)) 
     return prev;
-  printf("xinfo: %u\n", xinfo);
 
   haveValidXref = 0;  
   /* now go find xinfo in xref table */
@@ -220,10 +316,6 @@
     buf[MIN(MAX_STEPS,size-pos)] = '\0';
     if (2 != sscanf(buf, "%u %u", &xstart, &xcount)) 
       break;
-    printf("xstart: %u - xcount: %u - pos %u\n",
-          xstart,
-          xcount,
-          pos);
     while ( (pos < size) && (! IS_NL(data[pos])) )
       pos++;
     if ( (pos < size) && IS_NL(data[pos]))
@@ -234,9 +326,7 @@
       pos += 20 * xinfo - xstart;
       memcpy(buf, &data[pos], 20);
       buf[20] = '\0';
-      sscanf(buf, "%10llu %*5u %*c", &info_offset);
-      
-
+      sscanf(buf, "%10llu %*5u %*c", &info_offset);      
       break;
     }
     xrefpos = 20 * xcount + pos;    
@@ -245,12 +335,73 @@
   }
   if (! haveValidXref)
     return prev;
+  pos = info_offset;
+  
+  while ( (pos < size - 4) &&
+         (! ( (data[pos] == '<') &&
+              (data[pos+1] == '<') ) ) )
+    pos++;
+  pos++;
+  if (pos >= size - 4)
+    return prev;
+  if ( (data[pos] == ' ') ||
+       (data[pos] == 10) ||
+       (data[pos] == 13) ) 
+    pos++;
+  
+  while ( (pos < size - 2) &&
+         ( ! ( (data[pos] == '>') &&
+               (data[pos+1] == '>') ) ) ) {
+    i = 0;
+    while (tagmap[i].name != NULL) {
+      if ( (pos + strlen(tagmap[i].name) > pos) &&
+          (pos + strlen(tagmap[i].name) + 1 < size) &&
+          (0 == memcmp(&data[pos],
+                       tagmap[i].name,
+                       strlen(tagmap[i].name))) ) {
+       pos += strlen(tagmap[i].name);
+       if (isspace(data[pos]))
+         pos++;
+       spos = pos;
+       while ( (pos < size + 2) &&
+               (! IS_NL(data[pos])) &&
+               (data[pos] != '/') &&
+               (! ( (data[pos] == '>') &&
+                    (data[pos+1] == '>') ) ) )
+         pos++;        
+       meta = stndup(&data[spos],
+                     pos - spos);
+       if (i == 0) {
+         dmeta = dateDecode(meta);
+         mlen = strlen(dmeta);
+       } else {
+         dmeta = stringDecode(meta,
+                              &mlen);
+       }
+       if (meta != NULL)
+         free(meta);
+       meta = charsetDecode(dmeta, mlen);
+       if (dmeta != NULL)
+         free(dmeta);
+       if (meta != NULL) {
+         prev = addKeyword(tagmap[i].type,
+                           meta,
+                           prev);
+       }
+       break;
+      }
+      i++;
+    }
+    if (tagmap[i].name == NULL) {
+      while ( (pos < size) &&
+             (! IS_NL(data[pos])) )
+       pos++;
+    }
+    while ( (pos < size) &&
+           (IS_NL(data[pos])) )
+      pos++;
+  }
 
-  /* read size of xref */
-  /* parse xref */
-  /* find info index */
-  /* parse info */
-
   return prev;
 }
 

Modified: Extractor/src/plugins/pngextractor.c
===================================================================
--- Extractor/src/plugins/pngextractor.c        2006-03-25 18:44:05 UTC (rev 
2574)
+++ Extractor/src/plugins/pngextractor.c        2006-03-25 20:06:01 UTC (rev 
2575)
@@ -282,7 +282,7 @@
 
 
 
-struct EXTRACTOR_Keywords * libextractor_png_extract(char * filename,
+struct EXTRACTOR_Keywords * libextractor_png_extract(const char * filename,
                                                      const char * data,
                                                      size_t size,
                                                      struct EXTRACTOR_Keywords 
* prev) {

Modified: Extractor-docs/WWW/index.php
===================================================================
--- Extractor-docs/WWW/index.php        2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor-docs/WWW/index.php        2006-03-25 20:06:01 UTC (rev 2575)
@@ -41,31 +41,6 @@
      "This release adds support for extracting additional metadata from MS 
Word (OLE2) streams, including language, document statistics and editing 
history.");
 DTDD("Sat Feb 18 17:42:24 PST 2006 | libextractor v0.5.10 released.",
      "This release fixes some minor security problems in the PDF extractor.  
The OLE2 extractor supports additional mime types.  The TAR extractor is now 
extracting date, format long filenames and supports more checksum variants.");
-DTDD("Fri Dec 23 12:58:18 PST 2005 | libextractor v0.5.9 released.",
-     "This release fixes a rare crash in the MIME-extractor. The TAR extractor 
is now more robust and supports additional TAR variants. The split extractor 
now uses SPLIT for the keyword type, instead of UNKNOWN.");
-DTDD("Tue Dec  6 13:25:56 PST 2005 | libextractor v0.5.8 released.",
-     "This release fixes a %s in the PDF extractor.",
-     
extlink_("http://www.idefense.com/application/poi/display?id=344&type=vulnerabilities","security
 problem"));
-DTDD("Sat Nov 12 10:50:46 PST 2005 | libextractor v0.5.7 released.",
-     "This release features an updated German translation and improves support 
for the TAR and PDF formats.  Mime-type detection for OLE2 streams was 
improved.  The extract tool now returns an error code if files passed as 
arguments could not be accessed.  A double-free problem under BSD was fixed.");
-DTDD("Sun Sep 18 19:39:42 PDT 2005 | libextractor v0.5.6 released.",
-     "This release fixes warnings with gcc 4.0 and various bugs in the 
decompression code (including making it backwards compatible with zlib 1.1).  
Files are now mmaped read-only (possibly helping the VM perform better for very 
large files). The exiv2 extractor no longer copies the file in memory.  The 
HTML extractor was completely rewritten and made simpler and more robust.");
-DTDD("Wed Sep  7 21:41:35 PDT 2005 | libextractor v0.5.5 released.",
-     "This release fixes a problem with linkers that caused segmentation 
faults for Debian unstable users.  The deb extractor no longer uses pthreads.  
Dead code was eliminated in the OLE2 and OO extractors.  Minor bugfixes were 
ported from libgsf to the OLE2 extractor.  Mime-types are now detected for 
various Microsoft Office formats.  libextractor now automatically decompresses 
GZ and BZ2 files before extracting keywords, adding support for compressed 
files to all formats.  Individual extractors do no longer perform full-file 
decompression, avoiding some redundant computation.");
-DTDD("Fri Aug 26 22:47:07 PDT 2005 | libextractor v0.5.4 released.",
-     "This release fixes a memory leak in the thumbnail extractor, character 
set conversion in the OLE2 extractors and the build on OS X.  Quotations now 
follow GNU standards.   A workaround for a bug in libstdc++ that could cause 
segmentation fauls was added.  A new version of the python binding has also 
been released; this revision fixes various problems with the build process.");
-DTDD("Sat Aug 13 19:08:46 PDT 2005 | libextractor v0.5.3 released.",
-     "This release fixes various bugs in the EXIV2, OO and OLE2 plugins.  A 
static, relocatable version of glib is no longer required.");
-DTDD("Thu Jul 14 22:31:28 CEST 2005 | libextractor v0.5.2 released.",
-     "This release adds support for exiv2.  The API was extended to support 
in-memory metadata extraction (no file required).  Also new are functions to 
encode and decode the binary metadata of a thumbnail.  Various plugins were 
changed to allow for the in-memory metadata extraction.  A minor compile error 
was fixed.");
-DTDD("Mon Jul  4 18:24:18 CEST 2005 | libextractor v0.5.1 released.",
-     "This release moves the Java and Python bindings into seperate packages.  
The new version improves the build system and contains some code cleanups.");
-DTDD("Sun May 21 13:58:52 CET 2005 | libextractor v0.5.0 released.",
-     "This release adds support for Python. Also, plugins can now be supplied 
with user-provided options.");
-DTDD("Thu Feb 24 01:23:31 EST 2005 | libextractor v0.4.2 released.",
-     "This release fixes some bugs in the ID3, PDF, PNG and REAL extractors.  
The REAL extractor now also handles the new Helix formats.  libextractor can 
now also be used to extract thumbnails from images (using ImageMagick).");
-DTDD("Wed Jan 26 19:51:44 EST 2005 | libextractor v0.4.1 released.",
-     "This release fixes a security issue (inherited from xpdf).  It also 
extracts more meta-data from files of TAR or QuickTime format.");
 echo "</dl>";
 P();
 W("%s",

Modified: Extractor-docs/WWW/oldnews.php3
===================================================================
--- Extractor-docs/WWW/oldnews.php3     2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor-docs/WWW/oldnews.php3     2006-03-25 20:06:01 UTC (rev 2575)
@@ -6,6 +6,31 @@
 $page="oldnews";
 include("html_header.php3");
 echo "<dl>";
+DTDD("Fri Dec 23 12:58:18 PST 2005 | libextractor v0.5.9 released.",
+     "This release fixes a rare crash in the MIME-extractor. The TAR extractor 
is now more robust and supports additional TAR variants. The split extractor 
now uses SPLIT for the keyword type, instead of UNKNOWN.");
+DTDD("Tue Dec  6 13:25:56 PST 2005 | libextractor v0.5.8 released.",
+     "This release fixes a %s in the PDF extractor.",
+     
extlink_("http://www.idefense.com/application/poi/display?id=344&type=vulnerabilities","security
 problem"));
+DTDD("Sat Nov 12 10:50:46 PST 2005 | libextractor v0.5.7 released.",
+     "This release features an updated German translation and improves support 
for the TAR and PDF formats.  Mime-type detection for OLE2 streams was 
improved.  The extract tool now returns an error code if files passed as 
arguments could not be accessed.  A double-free problem under BSD was fixed.");
+DTDD("Sun Sep 18 19:39:42 PDT 2005 | libextractor v0.5.6 released.",
+     "This release fixes warnings with gcc 4.0 and various bugs in the 
decompression code (including making it backwards compatible with zlib 1.1).  
Files are now mmaped read-only (possibly helping the VM perform better for very 
large files). The exiv2 extractor no longer copies the file in memory.  The 
HTML extractor was completely rewritten and made simpler and more robust.");
+DTDD("Wed Sep  7 21:41:35 PDT 2005 | libextractor v0.5.5 released.",
+     "This release fixes a problem with linkers that caused segmentation 
faults for Debian unstable users.  The deb extractor no longer uses pthreads.  
Dead code was eliminated in the OLE2 and OO extractors.  Minor bugfixes were 
ported from libgsf to the OLE2 extractor.  Mime-types are now detected for 
various Microsoft Office formats.  libextractor now automatically decompresses 
GZ and BZ2 files before extracting keywords, adding support for compressed 
files to all formats.  Individual extractors do no longer perform full-file 
decompression, avoiding some redundant computation.");
+DTDD("Fri Aug 26 22:47:07 PDT 2005 | libextractor v0.5.4 released.",
+     "This release fixes a memory leak in the thumbnail extractor, character 
set conversion in the OLE2 extractors and the build on OS X.  Quotations now 
follow GNU standards.   A workaround for a bug in libstdc++ that could cause 
segmentation fauls was added.  A new version of the python binding has also 
been released; this revision fixes various problems with the build process.");
+DTDD("Sat Aug 13 19:08:46 PDT 2005 | libextractor v0.5.3 released.",
+     "This release fixes various bugs in the EXIV2, OO and OLE2 plugins.  A 
static, relocatable version of glib is no longer required.");
+DTDD("Thu Jul 14 22:31:28 CEST 2005 | libextractor v0.5.2 released.",
+     "This release adds support for exiv2.  The API was extended to support 
in-memory metadata extraction (no file required).  Also new are functions to 
encode and decode the binary metadata of a thumbnail.  Various plugins were 
changed to allow for the in-memory metadata extraction.  A minor compile error 
was fixed.");
+DTDD("Mon Jul  4 18:24:18 CEST 2005 | libextractor v0.5.1 released.",
+     "This release moves the Java and Python bindings into seperate packages.  
The new version improves the build system and contains some code cleanups.");
+DTDD("Sun May 21 13:58:52 CET 2005 | libextractor v0.5.0 released.",
+     "This release adds support for Python. Also, plugins can now be supplied 
with user-provided options.");
+DTDD("Thu Feb 24 01:23:31 EST 2005 | libextractor v0.4.2 released.",
+     "This release fixes some bugs in the ID3, PDF, PNG and REAL extractors.  
The REAL extractor now also handles the new Helix formats.  libextractor can 
now also be used to extract thumbnails from images (using ImageMagick).");
+DTDD("Wed Jan 26 19:51:44 EST 2005 | libextractor v0.4.1 released.",
+     "This release fixes a security issue (inherited from xpdf).  It also 
extracts more meta-data from files of TAR or QuickTime format.");
 DTDD("Sat Dec 25 21:42:26 CET 2004 | libextractor v0.4.0 released.",
      "This release improves support for character sets (plugins are now 
expected to convert to UTF-8).  It also improves support for mp3 (adding 
genres) and png (handling of compressed comments).");
 DTDD("Sat Nov 13 13:23:23 EST 2004 | libextractor v0.3.11 released.",

Modified: GNUnet/src/util/logging.c
===================================================================
--- GNUnet/src/util/logging.c   2006-03-25 18:44:05 UTC (rev 2574)
+++ GNUnet/src/util/logging.c   2006-03-25 20:06:01 UTC (rev 2575)
@@ -193,7 +193,7 @@
       struct logfiledef def;
       char datestr[80];
       time_t curtime;
-      char *datefmt;
+      const char *datefmt;
       char c;
 
 #if ENABLE_NLS
[Prev in Thread]
Current Thread
[Next in Thread]
[GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extractor-docs/WWW GNUnet/src/util, grothoff <=
Prev by Date: [GNUnet-SVN] r2574 - GNUnet-docs/WWW
Next by Date: [GNUnet-SVN] r2576 - in GNUnet: po src/applications/fs/fsui
Previous by thread: [GNUnet-SVN] r2574 - GNUnet-docs/WWW
Next by thread: [GNUnet-SVN] r2576 - in GNUnet: po src/applications/fs/fsui
Index(es):
- Date
- Thread