[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extra
From: |
grothoff |
Subject: |
[GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extractor-docs/WWW GNUnet/src/util |
Date: |
Sat, 25 Mar 2006 12:06:07 -0800 (PST) |
Author: grothoff
Date: 2006-03-25 12:06:01 -0800 (Sat, 25 Mar 2006)
New Revision: 2575
Modified:
Extractor-docs/WWW/index.php
Extractor-docs/WWW/oldnews.php3
Extractor/ChangeLog
Extractor/TODO
Extractor/configure.ac
Extractor/doc/extract.1
Extractor/doc/libextractor.3
Extractor/src/plugins/Makefile.am
Extractor/src/plugins/pdfextractor.c
Extractor/src/plugins/pngextractor.c
GNUnet/src/util/logging.c
Log:
sync
Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/ChangeLog 2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,3 +1,8 @@
+Fri Mar 24 21:43:43 PST 2006
+ Started re-implementation of PDF support from scratch
+ (incomplete but working). Improvements to the build
+ system.
+
Thu Mar 9 17:46:39 PST 2006
Added support for wordleaker (additional meta-data for
OLE2 streams). Releasing libextractor 0.5.11.
Modified: Extractor/TODO
===================================================================
--- Extractor/TODO 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/TODO 2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,5 +1,4 @@
FIX:
-* HTML-extractor now broken (!) Also crappy code. FIX?!
* check exiv2 memory consumption on very large files;
also investigate 500kb (!) allocation/leak in exiv2 on test/test.html
(reported by valgrind)
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/configure.ac 2006-03-25 20:06:01 UTC (rev 2575)
@@ -261,6 +261,25 @@
printable=1])
AM_CONDITIONAL(HAVE_PRINTABLE, test x$printable != x0)
+
+xpdf=0
+AC_MSG_CHECKING([whether to enable xpdf-based extractor])
+AC_ARG_ENABLE(xpdf,
+ [AC_HELP_STRING([--enable-xpdf],[Enable xpdf-based extractor])
+ AC_HELP_STRING([--disable-xpdf],[Disable xpdf-based extractor])],
+ [case "$enableval" in
+ no) AC_MSG_RESULT(no)
+ xpdf=0
+ ;;
+ *) AC_MSG_RESULT(yes)
+ xpdf=1
+ ;;
+ esac],
+ [ AC_MSG_RESULT(no)
+ xpdf=0])
+AM_CONDITIONAL(HAVE_XPDF, test x$xpdf != x0)
+
+
exiv2=1
AC_MSG_CHECKING([whether to enable exiv2 extractor])
AC_ARG_ENABLE(exiv2,
@@ -303,3 +322,48 @@
])
AC_OUTPUT
+
+if test "x$xpdf" = "x1"
+then
+ AC_MSG_NOTICE([NOTICE: xpdf enabled (xpdf has a bad security record)])
+else
+ AC_MSG_NOTICE([NOTICE: xpdf disabled (result: limited PDF support)])
+fi
+
+if test "x$exiv2" = "x0"
+then
+ AC_MSG_NOTICE([NOTICE: exiv2 disabled])
+fi
+
+
+if test "x$printable" = "x0"
+then
+ AC_MSG_NOTICE([NOTICE: printable plugins disabled])
+else
+ AC_MSG_NOTICE([NOTICE: printable plugins enabled (will need 150 MB memory to
compile)])
+fi
+
+if test "x$without_glib" = "xtrue"
+then
+ AC_MSG_NOTICE([NOTICE: glib not used, no OLE2 (MS Office) support])
+fi
+
+if test "x$without_gtk" = "xtrue"
+then
+ AC_MSG_NOTICE([NOTICE: gtk not found, no thumbnail support])
+fi
+
+if test "x$HAVE_VORBISFILE_TRUE" = "x#"
+then
+ AC_MSG_NOTICE([NOTICE: vorbis support disabled])
+fi
+
+if test "x$HAVE_BZ2_TRUE" = "x#"
+then
+ AC_MSG_NOTICE([NOTICE: bzip2 support disabled])
+fi
+
+if test "x$HAVE_ZLIB_TRUE" = "x#"
+then
+ AC_MSG_ERROR([FATAL: zlib not found (headers not installed?)])
+fi
Modified: Extractor/doc/extract.1
===================================================================
--- Extractor/doc/extract.1 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/doc/extract.1 2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,4 +1,4 @@
-.TH EXTRACT 1 "April 28, 2005" "libextractor 0.4.2"
+.TH EXTRACT 1 "April 28, 2005" "libextractor 0.5.11"
.\" $Id
.SH NAME
extract
@@ -32,7 +32,7 @@
\&...
.br
.SH DESCRIPTION
-This manual page documents version 0.4.0 of the
+This manual page documents version 0.5.11 of the
.B extract
command.
.PP
Modified: Extractor/doc/libextractor.3
===================================================================
--- Extractor/doc/libextractor.3 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/doc/libextractor.3 2006-03-25 20:06:01 UTC (rev 2575)
@@ -1,6 +1,6 @@
.TH LIBEXTRACTOR 3 "Jul 14, 2005"
.SH NAME
-libextractor \- meta\-information extraction library 0.5.2
+libextractor \- meta\-information extraction library 0.5.11
.SH SYNOPSIS
\fB#include <extractor.h>
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor/src/plugins/Makefile.am 2006-03-25 20:06:01 UTC (rev 2575)
@@ -15,8 +15,17 @@
exiv2dir=exiv2
endif
-SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm pdf $(thumbdir)
$(exiv2dir) wordleaker
+if HAVE_XPDF
+ xpdfdir=pdf
+else
+ pdfplugin=libextractor_pdf.la
+endif
+# toggle for development
+# SUBDIRS = .
+SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(thumbdir)
$(exiv2dir) wordleaker
+
+
if HAVE_VORBISFILE
extraogg = libextractor_ogg.la
endif
@@ -30,7 +39,7 @@
oodir = oo
endif
-plugin_LTLIBRARIES = \
+plugin_LTLIBRARIES = $(pdfplugin) \
libextractor_asf.la \
libextractor_deb.la \
libextractor_dvi.la \
@@ -92,6 +101,13 @@
libextractor_wav_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
+libextractor_pdf_la_SOURCES = \
+ pdfextractor.c
+libextractor_pdf_la_LDFLAGS = \
+ $(PLUGINFLAGS) $(retaincommand)
+libextractor_pdf_la_LIBADD = \
+ libconvert.la
+
libextractor_mp3_la_SOURCES = \
mp3extractor.c
libextractor_mp3_la_LDFLAGS = \
Modified: Extractor/src/plugins/pdfextractor.c
===================================================================
--- Extractor/src/plugins/pdfextractor.c 2006-03-25 18:44:05 UTC (rev
2574)
+++ Extractor/src/plugins/pdfextractor.c 2006-03-25 20:06:01 UTC (rev
2575)
@@ -18,9 +18,25 @@
Boston, MA 02111-1307, USA.
*/
+/**
+ * TODO:
+ * - code clean up (factor out some parsing aspects?)
+ * - proper string decoding (escape sequences)
+ * - proper dictionary support
+ * - filters (compression!)
+ * - page count (and other document catalog information,
+ * such as language, viewer preferences, page layout,
+ * Metadatastreams (10.2.2), legal and permissions info)
+ * - pdf 1.5 support ((compressed) cross reference streams)
+ */
+
#include "platform.h"
#include "extractor.h"
#include <zlib.h>
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 1
+#endif
+#include <time.h>
#include "convert.h"
static char * stndup(const char * str,
@@ -32,23 +48,9 @@
return tmp;
}
-/**
- * strnlen is GNU specific, let's redo it here to be
- * POSIX compliant.
- */
-static size_t stnlen(const char * str,
- size_t maxlen) {
- size_t ret;
- ret = 0;
- while ( (ret < maxlen) &&
- (str[ret] != '\0') )
- ret++;
- return ret;
-}
-
static struct EXTRACTOR_Keywords *
addKeyword(EXTRACTOR_KeywordType type,
- const char * keyword,
+ char * keyword,
struct EXTRACTOR_Keywords * next) {
EXTRACTOR_KeywordList * result;
@@ -56,26 +58,108 @@
return next;
result = malloc(sizeof(EXTRACTOR_KeywordList));
result->next = next;
- result->keyword = strdup(keyword);
+ result->keyword = keyword;
result->keywordType = type;
return result;
}
+
+
+static char *
+dateDecode(const char * pdfString) {
+ unsigned char * ret;
+
+ if (pdfString == NULL)
+ return NULL;
+ if (strlen(pdfString) < 4)
+ return NULL;
+ return stndup(&pdfString[3], strlen(pdfString) - 4);
+}
+
+static unsigned char *
+stringDecode(const char * pdfString,
+ size_t * size) {
+ size_t slen;
+ unsigned char * ret;
+ char hex[3];
+ int i;
+ int val;
+
+ slen = strlen(pdfString);
+ if (slen < 2)
+ return NULL;
+ switch (pdfString[0]) {
+ case '(':
+ if (pdfString[slen-1] != ')')
+ return NULL;
+ /* todo: recode escape sequences! */
+ *size = slen - 2;
+ return stndup(&pdfString[1], slen-2);
+ case '<':
+ if (pdfString[slen-1] != '>')
+ return NULL;
+ hex[2] = '\0';
+ ret = malloc(1 + ((slen - 1) / 2));
+ for (i=0;i<slen-2;i+=2) {
+ hex[0] = pdfString[i+1];
+ hex[1] = '0';
+ if (i + 1 < slen)
+ hex[1] = pdfString[i+2];
+ if ( (1 != sscanf(hex, "%x", &val)) &&
+ (1 != sscanf(hex, "%X", &val)) ) {
+ free(ret);
+ return NULL;
+ }
+ ret[i/2] = val;
+ }
+ ret[(slen-1)/2] = '\0';
+ *size = (slen-1) / 2;
+ return ret;
+ }
+ return NULL;
+}
+
+static char *
+charsetDecode(const unsigned char * in,
+ size_t size) {
+ if (in == NULL)
+ return NULL;
+ if ( (size < 2) ||
+ (in[0] != 0xfe) ||
+ (in[1] != 0xff) ) {
+ /* TODO: extend glibc with
+ character set that corresponds to
+ Adobe's extended ISOLATIN1 encoding! */
+ return convertToUtf8(in,
+ size,
+ "CSISOLATIN1");
+ } else {
+ return convertToUtf8(&in[2],
+ size - 2,
+ "UNICODEBIG");
+ }
+
+}
+
static struct {
char * name;
EXTRACTOR_KeywordType type;
} tagmap[] = {
- { "Author" , EXTRACTOR_AUTHOR},
- { "Description" , EXTRACTOR_DESCRIPTION},
- { "Comment", EXTRACTOR_COMMENT},
- { "Copyright", EXTRACTOR_COPYRIGHT},
- { "Source", EXTRACTOR_SOURCE},
- { "Creation Time", EXTRACTOR_DATE},
- { "Title", EXTRACTOR_TITLE},
- { "Software", EXTRACTOR_SOFTWARE},
- { "Disclaimer", EXTRACTOR_DISCLAIMER},
- { "Warning", EXTRACTOR_WARNING},
- { "Signature", EXTRACTOR_RESOURCE_IDENTIFIER},
+ { "/CreationDate", EXTRACTOR_CREATION_DATE},
+ { "/Author" , EXTRACTOR_AUTHOR},
+ { "/Description" , EXTRACTOR_DESCRIPTION},
+ { "/Title" , EXTRACTOR_TITLE},
+ { "/Comment", EXTRACTOR_COMMENT},
+ { "/Copyright", EXTRACTOR_COPYRIGHT},
+ { "/Subject", EXTRACTOR_SUBJECT},
+ { "/PTEX.Fullbanner", EXTRACTOR_SOFTWARE},
+ { "/Creator", EXTRACTOR_CREATOR},
+ { "/ModDate", EXTRACTOR_MODIFICATION_DATE},
+ { "/Producer", EXTRACTOR_PRODUCER},
+ { "/Software", EXTRACTOR_SOFTWARE},
+ { "/Keywords", EXTRACTOR_KEYWORDS},
+ { "/Warning", EXTRACTOR_WARNING},
+ { "/Signature", EXTRACTOR_RESOURCE_IDENTIFIER},
{ NULL, EXTRACTOR_UNKNOWN},
};
@@ -97,7 +181,9 @@
size_t size,
struct EXTRACTOR_Keywords * prev) {
size_t pos;
+ size_t spos;
size_t steps;
+ size_t mlen;
unsigned int xstart;
unsigned int xcount;
unsigned int xinfo;
@@ -107,6 +193,10 @@
unsigned long long info_offset;
char buf[MAX_STEPS+1];
int i;
+ char * meta;
+ unsigned char * dmeta;
+ char pcnt[20];
+ float version;
while ( (size > 0) && (IS_NL(data[size-1])) )
size--;
@@ -116,6 +206,20 @@
return prev;
if (0 != memcmp(&data[size - strlen(PDF_EOF)], PDF_EOF, strlen(PDF_EOF)))
return prev;
+ /* PDF format is pretty much sure by now */
+ memcpy(buf,
+ data,
+ 8);
+ buf[8] = '\0';
+ if (1 != sscanf(buf, "%%PDF-%f", &version)) {
+ return prev;
+ }
+ sprintf(pcnt, "PDF %.1f", version);
+ prev = addKeyword(EXTRACTOR_FORMAT,
+ strdup(pcnt),
+ prev);
+
+
pos = size - strlen(PDF_EOF) - strlen(PDF_SXR);
steps = 0;
@@ -123,21 +227,20 @@
(pos > 0) &&
(0 != memcmp(&data[pos], PDF_SXR, strlen(PDF_SXR))) )
pos--;
- printf("pos: %u\n", pos);
- if (0 != memcmp(&data[pos], PDF_SXR, strlen(PDF_SXR)))
+ if (0 != memcmp(&data[pos], PDF_SXR, strlen(PDF_SXR))) {
+ /* cross reference streams not yet supported! */
return prev;
+ }
memcpy(buf, &data[pos + strlen(PDF_SXR)], steps);
buf[steps] = '\0';
if (1 != sscanf(buf, "%llu", &startxref))
return prev;
- printf("startxref: %llu\n", startxref);
if (startxref >= size - strlen(PDF_XREF))
return prev;
if (0 != memcmp(&data[startxref], PDF_XREF, strlen(PDF_XREF)))
return prev;
haveValidXref = 0;
xrefpos = startxref + strlen(PDF_XREF);
-
while (1) {
pos = xrefpos;
while ( (pos < size) && (IS_NL(data[pos])) )
@@ -146,10 +249,6 @@
buf[MIN(MAX_STEPS,size-pos)] = '\0';
if (2 != sscanf(buf, "%u %u", &xstart, &xcount))
break;
- printf("xstart: %u - xcount: %u - pos %u\n",
- xstart,
- xcount,
- pos);
while ( (pos < size) && (! IS_NL(data[pos])) )
pos++;
if ( (pos < size) && IS_NL(data[pos]))
@@ -158,8 +257,6 @@
if ( (xrefpos >= size) || (xrefpos < pos) )
return prev; /* invalid xref size */
haveValidXref = 1;
- printf("xref portion ends at %llu\n",
- xrefpos);
}
if (! haveValidXref)
return prev;
@@ -170,6 +267,7 @@
strlen(PDF_TRAILER)))
return prev;
pos += strlen(PDF_TRAILER);
+
SKIP("<< \n\r", pos, data, size);
while ( (pos < size) &&
(pos + strlen(PDF_INFO) < size) &&
@@ -186,8 +284,7 @@
}
while ( (pos < size) &&
(IS_NL(data[pos]) || isspace(data[pos]) ) )
- pos++;
- }
+ pos++; }
if ( ! ( (pos < size) &&
(pos + strlen(PDF_INFO) < size) &&
(0 == memcmp(&data[pos],
@@ -207,7 +304,6 @@
}
if (1 != sscanf(buf, "%u", &xinfo))
return prev;
- printf("xinfo: %u\n", xinfo);
haveValidXref = 0;
/* now go find xinfo in xref table */
@@ -220,10 +316,6 @@
buf[MIN(MAX_STEPS,size-pos)] = '\0';
if (2 != sscanf(buf, "%u %u", &xstart, &xcount))
break;
- printf("xstart: %u - xcount: %u - pos %u\n",
- xstart,
- xcount,
- pos);
while ( (pos < size) && (! IS_NL(data[pos])) )
pos++;
if ( (pos < size) && IS_NL(data[pos]))
@@ -234,9 +326,7 @@
pos += 20 * xinfo - xstart;
memcpy(buf, &data[pos], 20);
buf[20] = '\0';
- sscanf(buf, "%10llu %*5u %*c", &info_offset);
-
-
+ sscanf(buf, "%10llu %*5u %*c", &info_offset);
break;
}
xrefpos = 20 * xcount + pos;
@@ -245,12 +335,73 @@
}
if (! haveValidXref)
return prev;
+ pos = info_offset;
+
+ while ( (pos < size - 4) &&
+ (! ( (data[pos] == '<') &&
+ (data[pos+1] == '<') ) ) )
+ pos++;
+ pos++;
+ if (pos >= size - 4)
+ return prev;
+ if ( (data[pos] == ' ') ||
+ (data[pos] == 10) ||
+ (data[pos] == 13) )
+ pos++;
+
+ while ( (pos < size - 2) &&
+ ( ! ( (data[pos] == '>') &&
+ (data[pos+1] == '>') ) ) ) {
+ i = 0;
+ while (tagmap[i].name != NULL) {
+ if ( (pos + strlen(tagmap[i].name) > pos) &&
+ (pos + strlen(tagmap[i].name) + 1 < size) &&
+ (0 == memcmp(&data[pos],
+ tagmap[i].name,
+ strlen(tagmap[i].name))) ) {
+ pos += strlen(tagmap[i].name);
+ if (isspace(data[pos]))
+ pos++;
+ spos = pos;
+ while ( (pos < size + 2) &&
+ (! IS_NL(data[pos])) &&
+ (data[pos] != '/') &&
+ (! ( (data[pos] == '>') &&
+ (data[pos+1] == '>') ) ) )
+ pos++;
+ meta = stndup(&data[spos],
+ pos - spos);
+ if (i == 0) {
+ dmeta = dateDecode(meta);
+ mlen = strlen(dmeta);
+ } else {
+ dmeta = stringDecode(meta,
+ &mlen);
+ }
+ if (meta != NULL)
+ free(meta);
+ meta = charsetDecode(dmeta, mlen);
+ if (dmeta != NULL)
+ free(dmeta);
+ if (meta != NULL) {
+ prev = addKeyword(tagmap[i].type,
+ meta,
+ prev);
+ }
+ break;
+ }
+ i++;
+ }
+ if (tagmap[i].name == NULL) {
+ while ( (pos < size) &&
+ (! IS_NL(data[pos])) )
+ pos++;
+ }
+ while ( (pos < size) &&
+ (IS_NL(data[pos])) )
+ pos++;
+ }
- /* read size of xref */
- /* parse xref */
- /* find info index */
- /* parse info */
-
return prev;
}
Modified: Extractor/src/plugins/pngextractor.c
===================================================================
--- Extractor/src/plugins/pngextractor.c 2006-03-25 18:44:05 UTC (rev
2574)
+++ Extractor/src/plugins/pngextractor.c 2006-03-25 20:06:01 UTC (rev
2575)
@@ -282,7 +282,7 @@
-struct EXTRACTOR_Keywords * libextractor_png_extract(char * filename,
+struct EXTRACTOR_Keywords * libextractor_png_extract(const char * filename,
const char * data,
size_t size,
struct EXTRACTOR_Keywords
* prev) {
Modified: Extractor-docs/WWW/index.php
===================================================================
--- Extractor-docs/WWW/index.php 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor-docs/WWW/index.php 2006-03-25 20:06:01 UTC (rev 2575)
@@ -41,31 +41,6 @@
"This release adds support for extracting additional metadata from MS
Word (OLE2) streams, including language, document statistics and editing
history.");
DTDD("Sat Feb 18 17:42:24 PST 2006 | libextractor v0.5.10 released.",
"This release fixes some minor security problems in the PDF extractor.
The OLE2 extractor supports additional mime types. The TAR extractor is now
extracting date, format long filenames and supports more checksum variants.");
-DTDD("Fri Dec 23 12:58:18 PST 2005 | libextractor v0.5.9 released.",
- "This release fixes a rare crash in the MIME-extractor. The TAR extractor
is now more robust and supports additional TAR variants. The split extractor
now uses SPLIT for the keyword type, instead of UNKNOWN.");
-DTDD("Tue Dec 6 13:25:56 PST 2005 | libextractor v0.5.8 released.",
- "This release fixes a %s in the PDF extractor.",
-
extlink_("http://www.idefense.com/application/poi/display?id=344&type=vulnerabilities","security
problem"));
-DTDD("Sat Nov 12 10:50:46 PST 2005 | libextractor v0.5.7 released.",
- "This release features an updated German translation and improves support
for the TAR and PDF formats. Mime-type detection for OLE2 streams was
improved. The extract tool now returns an error code if files passed as
arguments could not be accessed. A double-free problem under BSD was fixed.");
-DTDD("Sun Sep 18 19:39:42 PDT 2005 | libextractor v0.5.6 released.",
- "This release fixes warnings with gcc 4.0 and various bugs in the
decompression code (including making it backwards compatible with zlib 1.1).
Files are now mmaped read-only (possibly helping the VM perform better for very
large files). The exiv2 extractor no longer copies the file in memory. The
HTML extractor was completely rewritten and made simpler and more robust.");
-DTDD("Wed Sep 7 21:41:35 PDT 2005 | libextractor v0.5.5 released.",
- "This release fixes a problem with linkers that caused segmentation
faults for Debian unstable users. The deb extractor no longer uses pthreads.
Dead code was eliminated in the OLE2 and OO extractors. Minor bugfixes were
ported from libgsf to the OLE2 extractor. Mime-types are now detected for
various Microsoft Office formats. libextractor now automatically decompresses
GZ and BZ2 files before extracting keywords, adding support for compressed
files to all formats. Individual extractors do no longer perform full-file
decompression, avoiding some redundant computation.");
-DTDD("Fri Aug 26 22:47:07 PDT 2005 | libextractor v0.5.4 released.",
- "This release fixes a memory leak in the thumbnail extractor, character
set conversion in the OLE2 extractors and the build on OS X. Quotations now
follow GNU standards. A workaround for a bug in libstdc++ that could cause
segmentation fauls was added. A new version of the python binding has also
been released; this revision fixes various problems with the build process.");
-DTDD("Sat Aug 13 19:08:46 PDT 2005 | libextractor v0.5.3 released.",
- "This release fixes various bugs in the EXIV2, OO and OLE2 plugins. A
static, relocatable version of glib is no longer required.");
-DTDD("Thu Jul 14 22:31:28 CEST 2005 | libextractor v0.5.2 released.",
- "This release adds support for exiv2. The API was extended to support
in-memory metadata extraction (no file required). Also new are functions to
encode and decode the binary metadata of a thumbnail. Various plugins were
changed to allow for the in-memory metadata extraction. A minor compile error
was fixed.");
-DTDD("Mon Jul 4 18:24:18 CEST 2005 | libextractor v0.5.1 released.",
- "This release moves the Java and Python bindings into seperate packages.
The new version improves the build system and contains some code cleanups.");
-DTDD("Sun May 21 13:58:52 CET 2005 | libextractor v0.5.0 released.",
- "This release adds support for Python. Also, plugins can now be supplied
with user-provided options.");
-DTDD("Thu Feb 24 01:23:31 EST 2005 | libextractor v0.4.2 released.",
- "This release fixes some bugs in the ID3, PDF, PNG and REAL extractors.
The REAL extractor now also handles the new Helix formats. libextractor can
now also be used to extract thumbnails from images (using ImageMagick).");
-DTDD("Wed Jan 26 19:51:44 EST 2005 | libextractor v0.4.1 released.",
- "This release fixes a security issue (inherited from xpdf). It also
extracts more meta-data from files of TAR or QuickTime format.");
echo "</dl>";
P();
W("%s",
Modified: Extractor-docs/WWW/oldnews.php3
===================================================================
--- Extractor-docs/WWW/oldnews.php3 2006-03-25 18:44:05 UTC (rev 2574)
+++ Extractor-docs/WWW/oldnews.php3 2006-03-25 20:06:01 UTC (rev 2575)
@@ -6,6 +6,31 @@
$page="oldnews";
include("html_header.php3");
echo "<dl>";
+DTDD("Fri Dec 23 12:58:18 PST 2005 | libextractor v0.5.9 released.",
+ "This release fixes a rare crash in the MIME-extractor. The TAR extractor
is now more robust and supports additional TAR variants. The split extractor
now uses SPLIT for the keyword type, instead of UNKNOWN.");
+DTDD("Tue Dec 6 13:25:56 PST 2005 | libextractor v0.5.8 released.",
+ "This release fixes a %s in the PDF extractor.",
+
extlink_("http://www.idefense.com/application/poi/display?id=344&type=vulnerabilities","security
problem"));
+DTDD("Sat Nov 12 10:50:46 PST 2005 | libextractor v0.5.7 released.",
+ "This release features an updated German translation and improves support
for the TAR and PDF formats. Mime-type detection for OLE2 streams was
improved. The extract tool now returns an error code if files passed as
arguments could not be accessed. A double-free problem under BSD was fixed.");
+DTDD("Sun Sep 18 19:39:42 PDT 2005 | libextractor v0.5.6 released.",
+ "This release fixes warnings with gcc 4.0 and various bugs in the
decompression code (including making it backwards compatible with zlib 1.1).
Files are now mmaped read-only (possibly helping the VM perform better for very
large files). The exiv2 extractor no longer copies the file in memory. The
HTML extractor was completely rewritten and made simpler and more robust.");
+DTDD("Wed Sep 7 21:41:35 PDT 2005 | libextractor v0.5.5 released.",
+ "This release fixes a problem with linkers that caused segmentation
faults for Debian unstable users. The deb extractor no longer uses pthreads.
Dead code was eliminated in the OLE2 and OO extractors. Minor bugfixes were
ported from libgsf to the OLE2 extractor. Mime-types are now detected for
various Microsoft Office formats. libextractor now automatically decompresses
GZ and BZ2 files before extracting keywords, adding support for compressed
files to all formats. Individual extractors do no longer perform full-file
decompression, avoiding some redundant computation.");
+DTDD("Fri Aug 26 22:47:07 PDT 2005 | libextractor v0.5.4 released.",
+ "This release fixes a memory leak in the thumbnail extractor, character
set conversion in the OLE2 extractors and the build on OS X. Quotations now
follow GNU standards. A workaround for a bug in libstdc++ that could cause
segmentation fauls was added. A new version of the python binding has also
been released; this revision fixes various problems with the build process.");
+DTDD("Sat Aug 13 19:08:46 PDT 2005 | libextractor v0.5.3 released.",
+ "This release fixes various bugs in the EXIV2, OO and OLE2 plugins. A
static, relocatable version of glib is no longer required.");
+DTDD("Thu Jul 14 22:31:28 CEST 2005 | libextractor v0.5.2 released.",
+ "This release adds support for exiv2. The API was extended to support
in-memory metadata extraction (no file required). Also new are functions to
encode and decode the binary metadata of a thumbnail. Various plugins were
changed to allow for the in-memory metadata extraction. A minor compile error
was fixed.");
+DTDD("Mon Jul 4 18:24:18 CEST 2005 | libextractor v0.5.1 released.",
+ "This release moves the Java and Python bindings into seperate packages.
The new version improves the build system and contains some code cleanups.");
+DTDD("Sun May 21 13:58:52 CET 2005 | libextractor v0.5.0 released.",
+ "This release adds support for Python. Also, plugins can now be supplied
with user-provided options.");
+DTDD("Thu Feb 24 01:23:31 EST 2005 | libextractor v0.4.2 released.",
+ "This release fixes some bugs in the ID3, PDF, PNG and REAL extractors.
The REAL extractor now also handles the new Helix formats. libextractor can
now also be used to extract thumbnails from images (using ImageMagick).");
+DTDD("Wed Jan 26 19:51:44 EST 2005 | libextractor v0.4.1 released.",
+ "This release fixes a security issue (inherited from xpdf). It also
extracts more meta-data from files of TAR or QuickTime format.");
DTDD("Sat Dec 25 21:42:26 CET 2004 | libextractor v0.4.0 released.",
"This release improves support for character sets (plugins are now
expected to convert to UTF-8). It also improves support for mp3 (adding
genres) and png (handling of compressed comments).");
DTDD("Sat Nov 13 13:23:23 EST 2004 | libextractor v0.3.11 released.",
Modified: GNUnet/src/util/logging.c
===================================================================
--- GNUnet/src/util/logging.c 2006-03-25 18:44:05 UTC (rev 2574)
+++ GNUnet/src/util/logging.c 2006-03-25 20:06:01 UTC (rev 2575)
@@ -193,7 +193,7 @@
struct logfiledef def;
char datestr[80];
time_t curtime;
- char *datefmt;
+ const char *datefmt;
char c;
#if ENABLE_NLS
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r2575 - Extractor Extractor/doc Extractor/src/plugins Extractor-docs/WWW GNUnet/src/util,
grothoff <=