[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r9752 - in Extractor/src: include plugins
From: |
gnunet |
Subject: |
[GNUnet-SVN] r9752 - in Extractor/src: include plugins |
Date: |
Mon, 14 Dec 2009 19:48:48 +0100 |
Author: grothoff
Date: 2009-12-14 19:48:48 +0100 (Mon, 14 Dec 2009)
New Revision: 9752
Added:
Extractor/src/plugins/dvi_extractor.c
Removed:
Extractor/src/plugins/dviextractor.c
Modified:
Extractor/src/include/extractor.h
Extractor/src/plugins/Makefile.am
Log:
dvi
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2009-12-14 18:27:19 UTC (rev 9751)
+++ Extractor/src/include/extractor.h 2009-12-14 18:48:48 UTC (rev 9752)
@@ -277,7 +277,7 @@
EXTRACTOR_METATYPE_GENERATOR = 103,
EXTRACTOR_METATYPE_ENCODED_BY = 121,
- EXTRACTOR_METATYPE_PRODUCTVERSION = 90,
+ EXTRACTOR_METATYPE_PROUCUCTVERSION = 90,
EXTRACTOR_METATYPE_DISCLAIMER = 27,
EXTRACTOR_METATYPE_FULL_DATA = 137,
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2009-12-14 18:27:19 UTC (rev 9751)
+++ Extractor/src/plugins/Makefile.am 2009-12-14 18:48:48 UTC (rev 9752)
@@ -89,6 +89,7 @@
libextractor_applefile.la \
libextractor_asf.la \
libextractor_deb.la \
+ libextractor_dvi.la \
libextractor_html.la \
libextractor_it.la \
libextractor_mime.la
@@ -113,6 +114,11 @@
libextractor_deb_la_LIBADD = \
-lz
+libextractor_dvi_la_SOURCES = \
+ dvi_extractor.c
+libextractor_dvi_la_LDFLAGS = \
+ $(PLUGINFLAGS)
+
libextractor_html_la_SOURCES = \
html_extractor.c
libextractor_html_la_LDFLAGS = \
@@ -135,7 +141,6 @@
OLD_LIBS = \
$(pdfplugin) \
- libextractor_dvi.la \
libextractor_elf.la \
$(extraflac) \
libextractor_flv.la \
@@ -244,11 +249,6 @@
libextractor_id3v24_la_LIBADD = \
$(top_builddir)/src/common/libextractor_common.la
-libextractor_dvi_la_SOURCES = \
- dviextractor.c
-libextractor_dvi_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
if HAVE_ZLIB
libextractor_tar_la_SOURCES = \
tarextractor.c
Copied: Extractor/src/plugins/dvi_extractor.c (from rev 9749,
Extractor/src/plugins/dviextractor.c)
===================================================================
--- Extractor/src/plugins/dvi_extractor.c (rev 0)
+++ Extractor/src/plugins/dvi_extractor.c 2009-12-14 18:48:48 UTC (rev
9752)
@@ -0,0 +1,246 @@
+/*
+ This file is part of libextractor.
+ (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 2, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+ */
+
+#include "platform.h"
+#include "extractor.h"
+
+typedef struct
+{
+ char *text;
+ enum EXTRACTOR_MetaType type;
+} Matches;
+
+static Matches tmap[] = {
+ {"/Title (", EXTRACTOR_METATYPE_TITLE},
+ {"/Subject (", EXTRACTOR_METATYPE_SUBJECT},
+ {"/Author (", EXTRACTOR_METATYPE_AUTHOR_NAME},
+ {"/Keywords (", EXTRACTOR_METATYPE_KEYWORDS},
+ {"/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
+ {"/Producer (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
+ {NULL, 0},
+};
+
+static int
+parseZZZ (const char *data,
+ size_t pos, size_t len,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls)
+{
+ size_t slen;
+ size_t end;
+ int i;
+ char *value;
+
+ end = pos + len;
+ slen = strlen ("ps:SDict begin [");
+ if (len <= slen)
+ return 0;
+ if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen))
+ return 0;
+ pos += slen;
+ while (pos < end)
+ {
+ i = 0;
+ while (tmap[i].text != NULL)
+ {
+ slen = strlen (tmap[i].text);
+ if (pos + slen < end)
+ {
+ if (0 == strncmp (&data[pos], tmap[i].text, slen))
+ {
+ pos += slen;
+ slen = pos;
+ while ((slen < end) && (data[slen] != ')'))
+ slen++;
+ slen = slen - pos;
+ value = malloc (slen + 1);
+ value[slen] = '\0';
+ memcpy (value, &data[pos], slen);
+ if (0 != proc (proc_cls,
+ "dvi",
+ tmap[i].type,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ value,
+ strlen (value) +1))
+ return 1;
+ pos += slen + 1;
+ }
+ }
+ i++;
+ }
+ pos++;
+ }
+ return 0;
+}
+
+static unsigned int
+getIntAt (const void *data)
+{
+ char p[4];
+
+ memcpy (p, data, 4); /* ensure alignment! */
+ return *(unsigned int *) &p[0];
+}
+
+static unsigned int
+getShortAt (const void *data)
+{
+ char p[2];
+
+ memcpy (p, data, 2); /* ensure alignment! */
+ return *(unsigned short *) &p[0];
+}
+
+
+int
+EXTRACTOR_dvi_extract (const unsigned char *data,
+ size_t size,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls,
+ const char *options)
+{
+ unsigned int klen;
+ char *comment;
+ unsigned int pos;
+ unsigned int opos;
+ unsigned int len;
+ unsigned int pageCount;
+ char pages[16];
+
+ if (size < 40)
+ return 0;
+ if ((data[0] != 247) || (data[1] != 2))
+ return 0; /* cannot be dvi or unsupported version */
+ klen = data[14];
+
+ pos = size - 1;
+ while ((data[pos] == 223) && (pos > 0))
+ pos--;
+ if ((data[pos] != 2) || (pos < 40))
+ return 0;
+ pos--;
+ pos -= 4;
+ /* assert pos at 'post_post tag' */
+ if (data[pos] != 249)
+ return 0;
+ opos = pos;
+ pos = ntohl (getIntAt (&data[opos + 1]));
+ if (pos + 25 > size)
+ return 0;
+ /* assert pos at 'post' command */
+ if (data[pos] != 248)
+ return 0;
+ pageCount = 0;
+ opos = pos;
+ pos = ntohl (getIntAt (&data[opos + 1]));
+ while (1)
+ {
+ if (pos == (unsigned int) -1)
+ break;
+ if (pos + 45 > size)
+ return 0;
+ if (data[pos] != 139) /* expect 'bop' */
+ return 0;
+ pageCount++;
+ opos = pos;
+ pos = ntohl (getIntAt (&data[opos + 41]));
+ if (pos == (unsigned int) -1)
+ break;
+ if (pos >= opos)
+ return 0; /* invalid! */
+ }
+ /* ok, now we believe it's a dvi... */
+ snprintf (pages, sizeof(pages), "%u", pageCount);
+ if (0 != proc (proc_cls,
+ "dvi",
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ "application/x-dvi",
+ strlen ("application/x-dvi") +1))
+ return 1;
+ comment = malloc (klen + 1);
+ comment[klen] = '\0';
+ memcpy (comment, &data[15], klen);
+ if (0 != proc (proc_cls,
+ "dvi",
+ EXTRACTOR_METATYPE_COMMENT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ comment,
+ strlen (comment) +1))
+ {
+ free (comment);
+ return 1;
+ }
+ free (comment);
+ if (0 != proc (proc_cls,
+ "dvi",
+ EXTRACTOR_METATYPE_PAGE_COUNT,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ pages,
+ strlen (pages) +1))
+ return 1;
+ /* try to find PDF/ps special */
+ pos = opos;
+ while (pos < size - 100)
+ {
+ switch (data[pos])
+ {
+ case 139: /* begin page 'bop', we typically have to skip
that one to
+ find the zzz's */
+ pos += 45; /* skip bop */
+ break;
+ case 239: /* zzz1 */
+ len = data[pos + 1];
+ if (pos + 2 + len < size)
+ if (0 != parseZZZ ((const char *) data, pos + 2, len, proc,
proc_cls))
+ return 1;
+ pos += len + 2;
+ break;
+ case 240: /* zzz2 */
+ len = ntohs (getShortAt (&data[pos + 1]));
+ if (pos + 3 + len < size)
+ if (0 != parseZZZ ((const char *) data, pos + 3, len, proc,
proc_cls))
+ return 1;
+ pos += len + 3;
+ break;
+ case 241: /* zzz3, who uses that? */
+ len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3];
+ if (pos + 4 + len < size)
+ if (0 != parseZZZ ((const char *) data, pos + 4, len, proc,
proc_cls))
+ return 1;
+ pos += len + 4;
+ break;
+ case 242: /* zzz4, hurray! */
+ len = ntohl (getIntAt (&data[pos + 1]));
+ if (pos + 1 + len < size)
+ if (0 != parseZZZ ((const char *) data, pos + 5, len, proc,
proc_cls))
+ return 1;
+ pos += len + 5;
+ break;
+ default: /* unsupported opcode, abort scan */
+ return 0;
+ }
+ }
+ return 0;
+}
Deleted: Extractor/src/plugins/dviextractor.c
===================================================================
--- Extractor/src/plugins/dviextractor.c 2009-12-14 18:27:19 UTC (rev
9751)
+++ Extractor/src/plugins/dviextractor.c 2009-12-14 18:48:48 UTC (rev
9752)
@@ -1,221 +0,0 @@
-/*
- This file is part of libextractor.
- (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
-
- libextractor is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 2, or (at your
- option) any later version.
-
- libextractor is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with libextractor; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
- */
-
-#include "platform.h"
-#include "extractor.h"
-
-static EXTRACTOR_KeywordList *
-addKeyword (EXTRACTOR_KeywordType type,
- char *keyword, EXTRACTOR_KeywordList * next)
-{
- EXTRACTOR_KeywordList *result;
-
- if (keyword == NULL)
- return next;
- result = malloc (sizeof (EXTRACTOR_KeywordList));
- result->next = next;
- result->keyword = keyword;
- result->keywordType = type;
- return result;
-}
-
-typedef struct
-{
- char *text;
- EXTRACTOR_KeywordType type;
-} Matches;
-
-static Matches tmap[] = {
- {"/Title (", EXTRACTOR_TITLE},
- {"/Subject (", EXTRACTOR_SUBJECT},
- {"/Author (", EXTRACTOR_AUTHOR},
- {"/Keywords (", EXTRACTOR_KEYWORDS},
- {"/Creator (", EXTRACTOR_CREATOR},
- {"/Producer (", EXTRACTOR_PRODUCER},
- {NULL, 0},
-};
-
-static struct EXTRACTOR_Keywords *
-parseZZZ (const char *data,
- size_t pos, size_t len, struct EXTRACTOR_Keywords *prev)
-{
- size_t slen;
- size_t end;
- int i;
- char *value;
-
- end = pos + len;
- slen = strlen ("ps:SDict begin [");
- if (len <= slen)
- return prev;
- if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen))
- return prev;
- pos += slen;
- while (pos < end)
- {
- i = 0;
- while (tmap[i].text != NULL)
- {
- slen = strlen (tmap[i].text);
- if (pos + slen < end)
- {
- if (0 == strncmp (&data[pos], tmap[i].text, slen))
- {
- pos += slen;
- slen = pos;
- while ((slen < end) && (data[slen] != ')'))
- slen++;
- slen = slen - pos;
- value = malloc (slen + 1);
- value[slen] = '\0';
- memcpy (value, &data[pos], slen);
- prev = addKeyword (tmap[i].type, value, prev);
- pos += slen + 1;
- }
- }
- i++;
- }
- pos++;
- }
- return prev;
-}
-
-static unsigned int
-getIntAt (const void *data)
-{
- char p[4];
-
- memcpy (p, data, 4); /* ensure alignment! */
- return *(unsigned int *) &p[0];
-}
-
-static unsigned int
-getShortAt (const void *data)
-{
- char p[2];
-
- memcpy (p, data, 2); /* ensure alignment! */
- return *(unsigned short *) &p[0];
-}
-
-struct EXTRACTOR_Keywords *
-libextractor_dvi_extract (const char *filename,
- const unsigned char *data,
- size_t size, struct EXTRACTOR_Keywords *prev)
-{
- unsigned int klen;
- char *comment;
- unsigned int pos;
- unsigned int opos;
- unsigned int len;
- unsigned int pageCount;
- char *pages;
-
- if (size < 40)
- return prev;
- if ((data[0] != 247) || (data[1] != 2))
- return prev; /* cannot be dvi or unsupported version */
- klen = data[14];
-
- pos = size - 1;
- while ((data[pos] == 223) && (pos > 0))
- pos--;
- if ((data[pos] != 2) || (pos < 40))
- return prev;
- pos--;
- pos -= 4;
- /* assert pos at 'post_post tag' */
- if (data[pos] != 249)
- return prev;
- opos = pos;
- pos = ntohl (getIntAt (&data[opos + 1]));
- if (pos + 25 > size)
- return prev;
- /* assert pos at 'post' command */
- if (data[pos] != 248)
- return prev;
- pageCount = 0;
- opos = pos;
- pos = ntohl (getIntAt (&data[opos + 1]));
- while (1)
- {
- if (pos == (unsigned int) -1)
- break;
- if (pos + 45 > size)
- return prev;
- if (data[pos] != 139) /* expect 'bop' */
- return prev;
- pageCount++;
- opos = pos;
- pos = ntohl (getIntAt (&data[opos + 41]));
- if (pos == (unsigned int) -1)
- break;
- if (pos >= opos)
- return prev; /* invalid! */
- }
- /* ok, now we believe it's a dvi... */
- pages = malloc (16);
- snprintf (pages, 16, "%u", pageCount);
- comment = malloc (klen + 1);
- comment[klen] = '\0';
- memcpy (comment, &data[15], klen);
- prev = addKeyword (EXTRACTOR_MIMETYPE, strdup ("application/x-dvi"), prev);
- prev = addKeyword (EXTRACTOR_COMMENT, comment, prev);
- prev = addKeyword (EXTRACTOR_PAGE_COUNT, pages, prev);
- /* try to find PDF/ps special */
- pos = opos;
- while (pos < size - 100)
- {
- switch (data[pos])
- {
- case 139: /* begin page 'bop', we typically have to skip
that one to
- find the zzz's */
- pos += 45; /* skip bop */
- break;
- case 239: /* zzz1 */
- len = data[pos + 1];
- if (pos + 2 + len < size)
- prev = parseZZZ ((const char *) data, pos + 2, len, prev);
- pos += len + 2;
- break;
- case 240: /* zzz2 */
- len = ntohs (getShortAt (&data[pos + 1]));
- if (pos + 3 + len < size)
- prev = parseZZZ ((const char *) data, pos + 3, len, prev);
- pos += len + 3;
- break;
- case 241: /* zzz3, who uses that? */
- len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3];
- if (pos + 4 + len < size)
- prev = parseZZZ ((const char *) data, pos + 4, len, prev);
- pos += len + 4;
- break;
- case 242: /* zzz4, hurray! */
- len = ntohl (getIntAt (&data[pos + 1]));
- if (pos + 1 + len < size)
- prev = parseZZZ ((const char *) data, pos + 5, len, prev);
- pos += len + 5;
- break;
- default: /* unsupported opcode, abort scan */
- return prev;
- }
- }
- return prev;
-}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r9752 - in Extractor/src: include plugins,
gnunet <=