[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r1242 - in Extractor-python: . m4
From: |
grothoff |
Subject: |
[GNUnet-SVN] r1242 - in Extractor-python: . m4 |
Date: |
Mon, 4 Jul 2005 09:01:43 -0700 (PDT) |
Author: grothoff
Date: 2005-07-04 09:01:35 -0700 (Mon, 04 Jul 2005)
New Revision: 1242
Added:
Extractor-python/extractor.h
Extractor-python/m4/
Extractor-python/m4/ac_python_devel.m4
Modified:
Extractor-python/bootstrap
Extractor-python/configure.ac
Extractor-python/libextractor_python.c
Extractor-python/libextractor_python_setup.py
Log:
py
Modified: Extractor-python/bootstrap
===================================================================
--- Extractor-python/bootstrap 2005-07-04 15:17:20 UTC (rev 1241)
+++ Extractor-python/bootstrap 2005-07-04 16:01:35 UTC (rev 1242)
@@ -1,5 +1,2 @@
#!/bin/sh
autoreconf -f -i
-cd libltdl
-autoreconf -f -i
-cd ..
Modified: Extractor-python/configure.ac
===================================================================
--- Extractor-python/configure.ac 2005-07-04 15:17:20 UTC (rev 1241)
+++ Extractor-python/configure.ac 2005-07-04 16:01:35 UTC (rev 1242)
@@ -20,7 +20,7 @@
# test for libextractor
extractor=0
-AC_MSG_CHECKING(for libextractor)
+AC_MSG_CHECKING([for libextractor])
AC_ARG_WITH(extractor,
[ --with-extractor=PFX Base of libextractor installation],
[AC_MSG_RESULT([$with_extractor])
@@ -33,8 +33,8 @@
extractor=1))
;;
*)
- LDFLAGS="-L$with_extractor/lib $LDFLAGS"
- CPPFLAGS="-I$with_extractor/include $CPPFLAGS"
+ LIBDIR="-L$with_extractor/lib $LDFLAGS"
+ INCLUDEDIR="$with_extractor/include $CPPFLAGS"
AC_CHECK_HEADERS(extractor.h,
AC_CHECK_LIB([extractor], [EXTRACTOR_loadDefaultLibraries],
EXT_LIB_PATH="-L$with_extractor/lib $EXT_LIB_PATH"
@@ -51,6 +51,9 @@
AC_MSG_ERROR([libextractor-python requires libextractor])
fi
+AC_SUBST(INCLUDEDIR)
+AC_SUBST(LDFLAGS)
+AC_SUBST(LIBDIR)
AC_CONFIG_FILES([Makefile])
AC_OUTPUT
Added: Extractor-python/extractor.h
===================================================================
--- Extractor-python/extractor.h 2005-07-04 15:17:20 UTC (rev 1241)
+++ Extractor-python/extractor.h 2005-07-04 16:01:35 UTC (rev 1242)
@@ -0,0 +1,347 @@
+/*
+ This file is part of libextractor.
+ (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 2, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+ */
+
+#ifndef EXTRACTOR_H
+#define EXTRACTOR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * 0.2.6-1 => 0x00020601
+ * 4.5.2-0 => 0x04050200
+ */
+#define EXTRACTOR_VERSION 0x00050002
+
+#include <stdio.h>
+
+/* ignore the 'type' of the keyword when eliminating duplicates */
+#define EXTRACTOR_DUPLICATES_TYPELESS 1
+/* remove type 'UNKNOWN' if there is a duplicate keyword of
+ known type, even if usually different types should be
+ preserved */
+#define EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN 2
+
+#define EXTRACTOR_DEFAULT_LIBRARIES EXTRACTOR_getDefaultLibraries()
+
+const char * EXTRACTOR_getDefaultLibraries(void);
+
+/**
+ * Enumeration defining various sources of keywords.
+ * See also
+ * http://dublincore.org/documents/1998/09/dces/
+ */
+typedef enum {
+ EXTRACTOR_UNKNOWN = 0,
+ EXTRACTOR_FILENAME = 1,
+ EXTRACTOR_MIMETYPE = 2,
+ EXTRACTOR_TITLE = 3,
+ EXTRACTOR_AUTHOR = 4,
+ EXTRACTOR_ARTIST = 5,
+ EXTRACTOR_DESCRIPTION = 6,
+ EXTRACTOR_COMMENT = 7,
+ EXTRACTOR_DATE = 8,
+ EXTRACTOR_PUBLISHER = 9,
+ EXTRACTOR_LANGUAGE = 10,
+ EXTRACTOR_ALBUM = 11,
+ EXTRACTOR_GENRE = 12,
+ EXTRACTOR_LOCATION = 13,
+ EXTRACTOR_VERSIONNUMBER = 14,
+ EXTRACTOR_ORGANIZATION = 15,
+ EXTRACTOR_COPYRIGHT = 16,
+ EXTRACTOR_SUBJECT = 17,
+ EXTRACTOR_KEYWORDS = 18,
+ EXTRACTOR_CONTRIBUTOR = 19,
+ EXTRACTOR_RESOURCE_TYPE = 20,
+ EXTRACTOR_FORMAT = 21,
+ EXTRACTOR_RESOURCE_IDENTIFIER = 22,
+ EXTRACTOR_SOURCE = 23,
+ EXTRACTOR_RELATION = 24,
+ EXTRACTOR_COVERAGE = 25,
+ EXTRACTOR_SOFTWARE = 26,
+ EXTRACTOR_DISCLAIMER = 27,
+ EXTRACTOR_WARNING = 28,
+ EXTRACTOR_TRANSLATED = 29,
+ EXTRACTOR_CREATION_DATE = 30,
+ EXTRACTOR_MODIFICATION_DATE = 31,
+ EXTRACTOR_CREATOR = 32,
+ EXTRACTOR_PRODUCER = 33,
+ EXTRACTOR_PAGE_COUNT = 34,
+ EXTRACTOR_PAGE_ORIENTATION = 35,
+ EXTRACTOR_PAPER_SIZE = 36,
+ EXTRACTOR_USED_FONTS = 37,
+ EXTRACTOR_PAGE_ORDER = 38,
+ EXTRACTOR_CREATED_FOR = 39,
+ EXTRACTOR_MAGNIFICATION = 40,
+ EXTRACTOR_RELEASE = 41,
+ EXTRACTOR_GROUP = 42,
+ EXTRACTOR_SIZE = 43,
+ EXTRACTOR_SUMMARY = 44,
+ EXTRACTOR_PACKAGER = 45,
+ EXTRACTOR_VENDOR = 46,
+ EXTRACTOR_LICENSE = 47,
+ EXTRACTOR_DISTRIBUTION = 48,
+ EXTRACTOR_BUILDHOST = 49,
+ EXTRACTOR_OS = 50,
+ EXTRACTOR_DEPENDENCY = 51,
+ EXTRACTOR_HASH_MD4 = 52,
+ EXTRACTOR_HASH_MD5 = 53,
+ EXTRACTOR_HASH_SHA0 = 54,
+ EXTRACTOR_HASH_SHA1 = 55,
+ EXTRACTOR_HASH_RMD160 = 56,
+ EXTRACTOR_RESOLUTION = 57,
+ EXTRACTOR_CATEGORY = 58,
+ EXTRACTOR_BOOKTITLE = 59,
+ EXTRACTOR_PRIORITY = 60,
+ EXTRACTOR_CONFLICTS = 61,
+ EXTRACTOR_REPLACES = 62,
+ EXTRACTOR_PROVIDES = 63,
+ EXTRACTOR_CONDUCTOR = 64,
+ EXTRACTOR_INTERPRET = 65,
+ EXTRACTOR_OWNER = 66,
+ EXTRACTOR_LYRICS = 67,
+ EXTRACTOR_MEDIA_TYPE = 68,
+ EXTRACTOR_CONTACT = 69,
+ EXTRACTOR_THUMBNAIL_DATA = 70,
+ EXTRACTOR_PUBLICATION_DATE = 71,
+ EXTRACTOR_CAMERA_MAKE = 72,
+ EXTRACTOR_CAMERA_MODEL = 73,
+ EXTRACTOR_EXPOSURE = 74,
+ EXTRACTOR_APERTURE = 75,
+ EXTRACTOR_EXPOSURE_BIAS = 76,
+ EXTRACTOR_FLASH = 77,
+ EXTRACTOR_FLASH_BIAS = 78,
+ EXTRACTOR_FOCAL_LENGTH = 79,
+ EXTRACTOR_FOCAL_LENGTH_35MM = 80,
+ EXTRACTOR_ISO_SPEED = 81,
+ EXTRACTOR_EXPOSURE_MODE = 82,
+ EXTRACTOR_METERING_MODE = 83,
+ EXTRACTOR_MACRO_MODE = 84,
+ EXTRACTOR_IMAGE_QUALITY = 85,
+ EXTRACTOR_WHITE_BALANCE = 86,
+ EXTRACTOR_FILESIZE = 87,
+ EXTRACTOR_ORIENTATION = 88,
+} EXTRACTOR_KeywordType;
+
+/**
+ * A linked list of keywords. This structure is passed around
+ * in libExtractor and is typically the result of any keyword
+ * extraction operation.
+ * <p>
+ * Each entry in the keyword list consists of a string (the
+ * keyword) and the keyword type (of type KeywordType)
+ * describing how/from where the keyword was obtained.
+ */
+typedef struct EXTRACTOR_Keywords {
+ /* the keyword that was found */
+ char * keyword;
+ /* the type of the keyword (classification) */
+ EXTRACTOR_KeywordType keywordType;
+ /* the next entry in the list */
+ struct EXTRACTOR_Keywords * next;
+} EXTRACTOR_KeywordList;
+
+/**
+ * Signature of the extract method that each plugin
+ * must provide.
+ */
+typedef EXTRACTOR_KeywordList *
+(*ExtractMethod)(const char * filename,
+ char * data,
+ size_t filesize,
+ EXTRACTOR_KeywordList * next,
+ const char * options);
+
+/**
+ * Linked list of extractor helper-libraries. An application
+ * builds this list by telling libextractor to load various
+ * keyword-extraction libraries. Libraries can also be unloaded
+ * (removed from this list, see removeLibrary).
+ * <p>
+ * Client code should never be concerned with the internals of
+ * this struct.
+ */
+typedef struct EXTRACTOR_Extractor {
+ void * libraryHandle;
+ char * libname;
+ ExtractMethod extractMethod;
+ struct EXTRACTOR_Extractor * next;
+ char * options;
+} EXTRACTOR_ExtractorList;
+
+/**
+ * Load the default set of libraries.
+ * @return the default set of libraries.
+ */
+EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries(void);
+
+/**
+ * Get the textual name of the keyword.
+ * @return NULL if the type is not known
+ */
+const char *
+EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type);
+
+/**
+ * Return the highest type number, exclusive as in [0,highest).
+ */
+EXTRACTOR_KeywordType
+EXTRACTOR_getHighestKeywordTypeNumber(void);
+
+/**
+ * Load multiple libraries as specified by the user.
+ * @param config a string given by the user that defines which
+ * libraries should be loaded. Has the format
+ * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
+ * For example,
+ * libextractor_mp3.so:libextractor_ogg.so loads the
+ * mp3 and the ogg library. The '-' before the LIBRARYNAME
+ * indicates that the library should be added to the end
+ * of the library list (addLibraryLast).
+ * @param prev the previous list of libraries, may be NULL
+ * @return the new list of libraries, equal to prev iff an error occured
+ * or if config was empty (or NULL).
+ */
+EXTRACTOR_ExtractorList *
+EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev,
+ const char * config);
+
+/**
+ * Add a library for keyword extraction.
+ * @param prev the previous list of libraries, may be NULL
+ * @param library the name of the library
+ * @return the new list of libraries, equal to prev iff an error occured
+ */
+EXTRACTOR_ExtractorList *
+EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev,
+ const char * library);
+
+/**
+ * Add a library for keyword extraction at the END of the list.
+ * @param prev the previous list of libraries, may be NULL
+ * @param library the name of the library
+ * @return the new list of libraries, always equal to prev
+ * except if prev was NULL and no error occurs
+ */
+EXTRACTOR_ExtractorList *
+EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev,
+ const char * library);
+
+/**
+ * Remove a library for keyword extraction.
+ * @param prev the current list of libraries
+ * @param library the name of the library to remove
+ * @return the reduced list, unchanged if the library was not loaded
+ */
+EXTRACTOR_ExtractorList *
+EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev,
+ const char * library);
+
+/**
+ * Remove all extractors.
+ * @param libraries the list of extractors
+ */
+void EXTRACTOR_removeAll(EXTRACTOR_ExtractorList * libraries);
+
+/**
+ * Extract keywords from a file using the available extractors.
+ * @param extractor the list of extractor libraries
+ * @param filename the name of the file
+ * @return the list of keywords found in the file, NULL if none
+ * were found (or other errors)
+ */
+EXTRACTOR_KeywordList *
+EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor,
+ const char * filename);
+
+
+/**
+ * Remove duplicate keywords from the list.
+ * @param list the original keyword list (destroyed in the process!)
+ * @param options a set of options (DUPLICATES_XXXX)
+ * @return a list of keywords without duplicates
+ */
+EXTRACTOR_KeywordList *
+EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list,
+ const unsigned int options);
+
+
+/**
+ * Remove empty (all-whitespace) keywords from the list.
+ * @param list the original keyword list (destroyed in the process!)
+ * @return a list of keywords without duplicates
+ */
+EXTRACTOR_KeywordList *
+EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list);
+
+/**
+ * Print a keyword list to a file.
+ * For debugging.
+ * @param handle the file to write to (stdout, stderr), must NOT be NULL
+ * @param keywords the list of keywords to print, may be NULL
+ */
+void EXTRACTOR_printKeywords(FILE * handle,
+ EXTRACTOR_KeywordList * keywords);
+
+/**
+ * Free the memory occupied by the keyword list (and the
+ * keyword strings in it!)
+ * @param keywords the list to free
+ */
+void EXTRACTOR_freeKeywords(EXTRACTOR_KeywordList * keywords);
+
+/**
+ * Extract the last keyword that of the given type from the keyword list.
+ * @param type the type of the keyword
+ * @param keywords the keyword list
+ * @return the last matching keyword, or NULL if none matches;
+ * the string returned is aliased in the keywords list and must
+ * not be freed or manipulated by the client. It will become
+ * invalid once the keyword list is freed.
+ */
+const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type,
+ EXTRACTOR_KeywordList * keywords);
+
+/**
+ * Extract the last keyword of the given string from the keyword list.
+ * @param type the string describing the type of the keyword
+ * @param keywords the keyword list
+ * @return the last matching keyword, or NULL if none matches;
+ * the string returned is aliased in the keywords list and must
+ * not be freed or manipulated by the client. It will become
+ * invalid once the keyword list is freed.
+ */
+const char * EXTRACTOR_extractLastByString(const char * type,
+ EXTRACTOR_KeywordList * keywords);
+
+/**
+ * Count the number of keywords in the keyword list.
+ * @param keywords the keyword list
+ * @return the number of keywords in the list
+ */
+unsigned int EXTRACTOR_countKeywords(EXTRACTOR_KeywordList * keywords);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
Modified: Extractor-python/libextractor_python.c
===================================================================
--- Extractor-python/libextractor_python.c 2005-07-04 15:17:20 UTC (rev
1241)
+++ Extractor-python/libextractor_python.c 2005-07-04 16:01:35 UTC (rev
1242)
@@ -297,16 +297,19 @@
/* Module type. */
-static EXTRACTOR_KeywordList *Module_extractMethod(const char *filename,
- char *data, size_t filesize,
- EXTRACTOR_KeywordList *next,
- const char *options)
-{
+static EXTRACTOR_KeywordList *
+Module_extractMethod(const char *filename,
+ char *data,
+ size_t filesize,
+ EXTRACTOR_KeywordList * next,
+ const char *options) {
Module *self = NULL;
self = (Module*)atoi(options); /* convert back from string repr of self. */
-
- printf("In the extractor with object %i.",(int)self);
+#if 0
+ printf("In the extractor with object %p.",
+ self);
+#endif
return next;
}
@@ -337,8 +340,8 @@
self->module->libraryHandle = NULL;
self->module->extractMethod = (ExtractMethod)&Module_extractMethod;
self->module->libname = strdup(name);
- self->module->options = malloc(12); /* store self as string in options. */
- sprintf(self->module->options,"%i",(int)self);
+ self->module->options = malloc(24); /* store self as string in options. */
+ snprintf(self->module->options, 24, "%p", self);
self->module->next = NULL;
goto finish;
@@ -439,7 +442,10 @@
static int Module_clear(Module *self)
{
- printf("Removing module in clear: %s.\n",self->module->libname);
+#if 0
+ printf("Removing module in clear: %s.\n",
+ self->module->libname);
+#endif
#ifdef Py_CLEAR
Py_CLEAR(self->mlist);
#endif
@@ -449,7 +455,10 @@
static void Module_dealloc(Module *self)
{
Module_clear(self);
- printf("Removing module: %s.\n",self->module->libname);
+#if 0
+ printf("Removing module: %s.\n",
+ self->module->libname);
+#endif
self->module->next = NULL;
EXTRACTOR_removeAll(self->module);
self->ob_type->tp_free((PyObject*)self);
Modified: Extractor-python/libextractor_python_setup.py
===================================================================
--- Extractor-python/libextractor_python_setup.py 2005-07-04 15:17:20 UTC
(rev 1241)
+++ Extractor-python/libextractor_python_setup.py 2005-07-04 16:01:35 UTC
(rev 1242)
@@ -3,14 +3,14 @@
path=sys.argv[0]
sys.argv = sys.argv[1:]
-
+
cmod = Extension("extractor",["libextractor_python.c"],
libraries=["extractor"],
- include_dirs=["../include"],
+ include_dirs=["."],
library_dirs=[path])
setup(name="Extractor",
- version="0.5.0",
+ version="0.5.1",
ext_modules=[cmod],
author="Christian Grothoff, Heiko Wundram",
author_email="address@hidden")
Added: Extractor-python/m4/ac_python_devel.m4
===================================================================
--- Extractor-python/m4/ac_python_devel.m4 2005-07-04 15:17:20 UTC (rev
1241)
+++ Extractor-python/m4/ac_python_devel.m4 2005-07-04 16:01:35 UTC (rev
1242)
@@ -0,0 +1,54 @@
+dnl Available from the GNU Autoconf Macro Archive at:
+dnl http://www.gnu.org/software/ac-archive/htmldoc/ac_python_devel.html
+dnl
+AC_DEFUN([AC_PYTHON_DEVEL],[
+ #
+ # should allow for checking of python version here...
+ #
+ AC_REQUIRE([AM_PATH_PYTHON])
+
+ # Check for Python include path
+ AC_MSG_CHECKING([for Python include path])
+ python_path=`echo $PYTHON | sed "s,/bin.*$,,"`
+ for i in "$python_path/include/python$PYTHON_VERSION/"
"$python_path/include/python/" "$python_path/" ; do
+ python_path=`find $i -type f -name Python.h -print | sed "1q"`
+ if test -n "$python_path" ; then
+ break
+ fi
+ done
+ python_path=`echo $python_path | sed "s,/Python.h$,,"`
+ AC_MSG_RESULT([$python_path])
+ if test -z "$python_path" ; then
+ AC_MSG_WARN([cannot find Python include path])
+ else
+ AC_SUBST([PYTHON_CPPFLAGS],[-I$python_path])
+
+ # Check for Python library path
+ AC_MSG_CHECKING([for Python library path])
+ python_path=`echo $PYTHON | sed "s,/bin.*$,,"`
+ for i in "$python_path/lib/python$PYTHON_VERSION/config/"
"$python_path/lib/python$PYTHON_VERSION/" "$python_path/lib/python/config/"
"$python_path/lib/python/" "$python_path/" ; do
+ python_path=`find $i -type f -name libpython$PYTHON_VERSION.*
-print | sed "1q"`
+ if test -n "$python_path" ; then
+ break
+ fi
+ done
+ python_path=`echo $python_path | sed "s,/libpython.*$,,"`
+ AC_MSG_RESULT([$python_path])
+ if test -z "$python_path" ; then
+ AC_MSG_ERROR([cannot find Python library path])
+ fi
+ AC_SUBST([PYTHON_LDFLAGS],["-L$python_path -lpython$PYTHON_VERSION"])
+ #
+ python_site=`echo $python_path | sed "s/config/site-packages/"`
+ AC_SUBST([PYTHON_SITE_PKG],[$python_site])
+ #
+ # libraries which must be linked in when embedding
+ #
+ AC_MSG_CHECKING(python extra libraries)
+ PYTHON_EXTRA_LIBS=`$PYTHON -c "import distutils.sysconfig; \
+ conf = distutils.sysconfig.get_config_var; \
+ print conf('LOCALMODLIBS')+' '+conf('LIBS')"
+ AC_MSG_RESULT($PYTHON_EXTRA_LIBS)`
+ AC_SUBST(PYTHON_EXTRA_LIBS)
+ fi
+])
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r1242 - in Extractor-python: . m4,
grothoff <=