gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r23243 - in Extractor/src/plugins: . old


From: gnunet
Subject: [GNUnet-SVN] r23243 - in Extractor/src/plugins: . old
Date: Wed, 15 Aug 2012 01:10:13 +0200

Author: grothoff
Date: 2012-08-15 01:10:13 +0200 (Wed, 15 Aug 2012)
New Revision: 23243

Added:
   Extractor/src/plugins/old/pdf_extractor.cc
Removed:
   Extractor/src/plugins/pdf_extractor.cc
Log:
moving plugins that currently do not compile to 'old' directory

Copied: Extractor/src/plugins/old/pdf_extractor.cc (from rev 23239, 
Extractor/src/plugins/pdf_extractor.cc)
===================================================================
--- Extractor/src/plugins/old/pdf_extractor.cc                          (rev 0)
+++ Extractor/src/plugins/old/pdf_extractor.cc  2012-08-14 23:10:13 UTC (rev 
23243)
@@ -0,0 +1,235 @@
+/*
+     This file is part of libextractor.
+     (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 2, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+
+     This code was inspired by pdfinfo and depends heavily
+     on the xpdf code that pdfinfo is a part of. See also
+     the INFO file in this directory.
+ */
+
+#include "platform.h"
+#include "extractor.h"
+#include "convert.h"
+#include <math.h>
+
+#include <poppler/goo/gmem.h>
+#include <poppler/Object.h>
+#include <poppler/Stream.h>
+#include <poppler/Array.h>
+#include <poppler/Dict.h>
+#include <poppler/XRef.h>
+#include <poppler/Catalog.h>
+#include <poppler/Page.h>
+#include <poppler/PDFDoc.h>
+#include <poppler/Error.h>
+#include <poppler/GlobalParams.h>
+#include <poppler/goo/GooString.h>
+
+#define ADD(s, type) do { if (0!=proc(proc_cls, "pdf", type, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) { err = 1; goto EXIT; 
}} while (0)
+
+static int 
+printInfoString(Dict *infoDict,
+               const char *key,
+               enum EXTRACTOR_MetaType type,
+               EXTRACTOR_MetaDataProcessor proc,
+               void *proc_cls)
+{
+  Object obj;
+  GooString *s1;
+  const char * s;
+  char *ckey = strdup (key);
+  int err = 0;
+  char * result;
+      
+  if (ckey == NULL)
+    return 0;
+  result = NULL;
+  if (infoDict->lookup(ckey, &obj)->isString()) {
+    s1 = obj.getString();
+    s = s1->getCString();
+    if ((((unsigned char)s[0]) & 0xff) == 0xfe &&
+       (((unsigned char)s[1]) & 0xff) == 0xff) {
+      result = EXTRACTOR_common_convert_to_utf8(&s[2], s1->getLength() - 2, 
"UTF-16BE");
+      if (result != NULL)
+       ADD (result, type);
+    } else {
+      size_t len = strlen(s);
+      
+      while(0 < len) 
+       {
+         /*
+          * Avoid outputting trailing spaces.
+          *
+          * The following expression might be rewritten as
+          * (! isspace(s[len - 1]) && 0xA0 != s[len - 1]).
+          * There seem to exist isspace() implementations
+          * which do return non-zero from NBSP (maybe locale-dependent).
+          * Remove ISO-8859 non-breaking space (NBSP, hex value 0xA0) from
+          * the expression if it looks suspicious (locale issues for instance).
+          *
+          * Squeezing out all non-printable characters might also be useful.
+          */
+         if ( (' '  != s[len - 1]) && (((char)0xA0) != s[len - 1]) &&
+               ('\r' != s[len - 1]) && ('\n' != s[len - 1]) &&
+               ('\t' != s[len - 1]) && ('\v' != s[len - 1]) &&
+               ('\f' != s[len - 1]) )
+           break;        
+          else
+            len --;
+        }
+
+        /* there should be a check to truncate preposterously long values. */
+      
+      if (0 < len) {
+       result = EXTRACTOR_common_convert_to_utf8(s, len,
+                                                 "ISO-8859-1");
+       if (result != NULL)
+         ADD (result, type);
+      }
+    }
+  }
+ EXIT:
+  obj.free();
+  if (result != NULL)
+    free (result);
+  free (ckey);
+  return err;
+}
+
+static int 
+printInfoDate(Dict *infoDict,
+             const char *key,
+             enum EXTRACTOR_MetaType type,
+             EXTRACTOR_MetaDataProcessor proc,
+             void *proc_cls)
+{
+  Object obj;
+  const char *s;
+  GooString *s1;  
+  char *gkey;
+  char * result;
+  int err;
+  
+  err = 0;
+  result = NULL;
+  gkey = strdup (key);
+  if (gkey == NULL)
+    return 0;
+  if (infoDict->lookup(gkey, &obj)->isString()) {
+    s1 = obj.getString();
+    s = s1->getCString();
+    
+    if ((s1->getChar(0) & 0xff) == 0xfe &&
+       (s1->getChar(1) & 0xff) == 0xff) {
+      /* isUnicode */
+      
+      result = EXTRACTOR_common_convert_to_utf8((const char*)&s[2], 
s1->getLength() - 2, "UTF-16BE");
+      if (result != NULL)
+       ADD (result, type);
+    } else {
+      if (s[0] == 'D' && s[1] == ':') 
+       s += 2;
+      
+      ADD (s, type);
+    }
+    /* printf(fmt, s);*/
+  }
+ EXIT:
+  obj.free();
+  if (result != NULL)
+    free (result);
+  free (gkey);
+  return err;
+}
+
+#define PIS(s,t) do { if (0 != (err = printInfoString (info.getDict(), s, t, 
proc, proc_cls))) goto EXIT; } while (0)
+
+#define PID(s,t) do { if (0 != (err = printInfoDate (info.getDict(), s, t, 
proc, proc_cls))) goto EXIT; } while (0)
+
+extern "C" {
+ 
+
+  int 
+  EXTRACTOR_pdf_extract (const char *data,
+                        size_t size,
+                        EXTRACTOR_MetaDataProcessor proc,
+                        void *proc_cls,
+                        const char *options)
+  {
+    PDFDoc * doc;
+    Object info;
+    Object obj;
+    BaseStream * stream;
+    int err;
+
+    if (globalParams == NULL)
+      {
+       globalParams = new GlobalParams();
+       globalParams->setErrQuiet (gTrue);
+      }
+    obj.initNull();
+    err = 0;
+    stream = new MemStream( (char*) data, 0, size, &obj);
+    doc = new PDFDoc(stream, NULL, NULL);
+    if (! doc->isOk()) {
+      delete doc;
+      return 0;
+    }
+
+    ADD ("application/pdf",
+        EXTRACTOR_METATYPE_MIMETYPE);
+    if ( (NULL != doc->getDocInfo(&info)) &&
+        (info.isDict()) ) {
+      PIS ("Title", EXTRACTOR_METATYPE_TITLE);
+      PIS ("Subject", EXTRACTOR_METATYPE_SUBJECT);
+      PIS ("Keywords", EXTRACTOR_METATYPE_KEYWORDS);
+      PIS ("Author", EXTRACTOR_METATYPE_AUTHOR_NAME);
+      /*
+       * we now believe that Adobe's Creator is not a person nor an
+       * organisation, but just a piece of software.
+       */
+      PIS ("Creator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE);
+      PIS ("Producer", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE);
+      {
+       char pcnt[20];
+       sprintf(pcnt, "%d", doc->getNumPages());
+       ADD (pcnt, EXTRACTOR_METATYPE_PAGE_COUNT);
+      }
+      {
+       char pcnt[64];
+#if HAVE_POPPLER_GETPDFMAJORVERSION
+       sprintf(pcnt, "PDF %d.%d", 
+               doc->getPDFMajorVersion(),
+               doc->getPDFMinorVersion());
+#else
+       sprintf(pcnt, "PDF %.1f", 
+               doc->getPDFVersion());
+#endif
+       ADD (pcnt, EXTRACTOR_METATYPE_FORMAT);
+      }
+      PID ("CreationDate", EXTRACTOR_METATYPE_CREATION_DATE);
+      PID ("ModDate", EXTRACTOR_METATYPE_MODIFICATION_DATE);
+    }
+  EXIT:
+    info.free();
+    delete doc;
+
+    return err;
+  }
+}
+

Deleted: Extractor/src/plugins/pdf_extractor.cc
===================================================================
--- Extractor/src/plugins/pdf_extractor.cc      2012-08-14 23:04:27 UTC (rev 
23242)
+++ Extractor/src/plugins/pdf_extractor.cc      2012-08-14 23:10:13 UTC (rev 
23243)
@@ -1,235 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
-
-     This code was inspired by pdfinfo and depends heavily
-     on the xpdf code that pdfinfo is a part of. See also
-     the INFO file in this directory.
- */
-
-#include "platform.h"
-#include "extractor.h"
-#include "convert.h"
-#include <math.h>
-
-#include <poppler/goo/gmem.h>
-#include <poppler/Object.h>
-#include <poppler/Stream.h>
-#include <poppler/Array.h>
-#include <poppler/Dict.h>
-#include <poppler/XRef.h>
-#include <poppler/Catalog.h>
-#include <poppler/Page.h>
-#include <poppler/PDFDoc.h>
-#include <poppler/Error.h>
-#include <poppler/GlobalParams.h>
-#include <poppler/goo/GooString.h>
-
-#define ADD(s, type) do { if (0!=proc(proc_cls, "pdf", type, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) { err = 1; goto EXIT; 
}} while (0)
-
-static int 
-printInfoString(Dict *infoDict,
-               const char *key,
-               enum EXTRACTOR_MetaType type,
-               EXTRACTOR_MetaDataProcessor proc,
-               void *proc_cls)
-{
-  Object obj;
-  GooString *s1;
-  const char * s;
-  char *ckey = strdup (key);
-  int err = 0;
-  char * result;
-      
-  if (ckey == NULL)
-    return 0;
-  result = NULL;
-  if (infoDict->lookup(ckey, &obj)->isString()) {
-    s1 = obj.getString();
-    s = s1->getCString();
-    if ((((unsigned char)s[0]) & 0xff) == 0xfe &&
-       (((unsigned char)s[1]) & 0xff) == 0xff) {
-      result = EXTRACTOR_common_convert_to_utf8(&s[2], s1->getLength() - 2, 
"UTF-16BE");
-      if (result != NULL)
-       ADD (result, type);
-    } else {
-      size_t len = strlen(s);
-      
-      while(0 < len) 
-       {
-         /*
-          * Avoid outputting trailing spaces.
-          *
-          * The following expression might be rewritten as
-          * (! isspace(s[len - 1]) && 0xA0 != s[len - 1]).
-          * There seem to exist isspace() implementations
-          * which do return non-zero from NBSP (maybe locale-dependent).
-          * Remove ISO-8859 non-breaking space (NBSP, hex value 0xA0) from
-          * the expression if it looks suspicious (locale issues for instance).
-          *
-          * Squeezing out all non-printable characters might also be useful.
-          */
-         if ( (' '  != s[len - 1]) && (((char)0xA0) != s[len - 1]) &&
-               ('\r' != s[len - 1]) && ('\n' != s[len - 1]) &&
-               ('\t' != s[len - 1]) && ('\v' != s[len - 1]) &&
-               ('\f' != s[len - 1]) )
-           break;        
-          else
-            len --;
-        }
-
-        /* there should be a check to truncate preposterously long values. */
-      
-      if (0 < len) {
-       result = EXTRACTOR_common_convert_to_utf8(s, len,
-                                                 "ISO-8859-1");
-       if (result != NULL)
-         ADD (result, type);
-      }
-    }
-  }
- EXIT:
-  obj.free();
-  if (result != NULL)
-    free (result);
-  free (ckey);
-  return err;
-}
-
-static int 
-printInfoDate(Dict *infoDict,
-             const char *key,
-             enum EXTRACTOR_MetaType type,
-             EXTRACTOR_MetaDataProcessor proc,
-             void *proc_cls)
-{
-  Object obj;
-  const char *s;
-  GooString *s1;  
-  char *gkey;
-  char * result;
-  int err;
-  
-  err = 0;
-  result = NULL;
-  gkey = strdup (key);
-  if (gkey == NULL)
-    return 0;
-  if (infoDict->lookup(gkey, &obj)->isString()) {
-    s1 = obj.getString();
-    s = s1->getCString();
-    
-    if ((s1->getChar(0) & 0xff) == 0xfe &&
-       (s1->getChar(1) & 0xff) == 0xff) {
-      /* isUnicode */
-      
-      result = EXTRACTOR_common_convert_to_utf8((const char*)&s[2], 
s1->getLength() - 2, "UTF-16BE");
-      if (result != NULL)
-       ADD (result, type);
-    } else {
-      if (s[0] == 'D' && s[1] == ':') 
-       s += 2;
-      
-      ADD (s, type);
-    }
-    /* printf(fmt, s);*/
-  }
- EXIT:
-  obj.free();
-  if (result != NULL)
-    free (result);
-  free (gkey);
-  return err;
-}
-
-#define PIS(s,t) do { if (0 != (err = printInfoString (info.getDict(), s, t, 
proc, proc_cls))) goto EXIT; } while (0)
-
-#define PID(s,t) do { if (0 != (err = printInfoDate (info.getDict(), s, t, 
proc, proc_cls))) goto EXIT; } while (0)
-
-extern "C" {
- 
-
-  int 
-  EXTRACTOR_pdf_extract (const char *data,
-                        size_t size,
-                        EXTRACTOR_MetaDataProcessor proc,
-                        void *proc_cls,
-                        const char *options)
-  {
-    PDFDoc * doc;
-    Object info;
-    Object obj;
-    BaseStream * stream;
-    int err;
-
-    if (globalParams == NULL)
-      {
-       globalParams = new GlobalParams();
-       globalParams->setErrQuiet (gTrue);
-      }
-    obj.initNull();
-    err = 0;
-    stream = new MemStream( (char*) data, 0, size, &obj);
-    doc = new PDFDoc(stream, NULL, NULL);
-    if (! doc->isOk()) {
-      delete doc;
-      return 0;
-    }
-
-    ADD ("application/pdf",
-        EXTRACTOR_METATYPE_MIMETYPE);
-    if ( (NULL != doc->getDocInfo(&info)) &&
-        (info.isDict()) ) {
-      PIS ("Title", EXTRACTOR_METATYPE_TITLE);
-      PIS ("Subject", EXTRACTOR_METATYPE_SUBJECT);
-      PIS ("Keywords", EXTRACTOR_METATYPE_KEYWORDS);
-      PIS ("Author", EXTRACTOR_METATYPE_AUTHOR_NAME);
-      /*
-       * we now believe that Adobe's Creator is not a person nor an
-       * organisation, but just a piece of software.
-       */
-      PIS ("Creator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE);
-      PIS ("Producer", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE);
-      {
-       char pcnt[20];
-       sprintf(pcnt, "%d", doc->getNumPages());
-       ADD (pcnt, EXTRACTOR_METATYPE_PAGE_COUNT);
-      }
-      {
-       char pcnt[64];
-#if HAVE_POPPLER_GETPDFMAJORVERSION
-       sprintf(pcnt, "PDF %d.%d", 
-               doc->getPDFMajorVersion(),
-               doc->getPDFMinorVersion());
-#else
-       sprintf(pcnt, "PDF %.1f", 
-               doc->getPDFVersion());
-#endif
-       ADD (pcnt, EXTRACTOR_METATYPE_FORMAT);
-      }
-      PID ("CreationDate", EXTRACTOR_METATYPE_CREATION_DATE);
-      PID ("ModDate", EXTRACTOR_METATYPE_MODIFICATION_DATE);
-    }
-  EXIT:
-    info.free();
-    delete doc;
-
-    return err;
-  }
-}
-




reply via email to

[Prev in Thread] Current Thread [Next in Thread]