[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r20777 - Extractor/src/main
From: |
gnunet |
Subject: |
[GNUnet-SVN] r20777 - Extractor/src/main |
Date: |
Mon, 26 Mar 2012 18:20:17 +0200 |
Author: grothoff
Date: 2012-03-26 18:20:17 +0200 (Mon, 26 Mar 2012)
New Revision: 20777
Modified:
Extractor/src/main/Makefile.am
Extractor/src/main/extractor.c
Log:
-LRN: Divide-extractor.c.patch was the first thing i did, once i
realized that extractor.c is just too long.
Modified: Extractor/src/main/Makefile.am
===================================================================
--- Extractor/src/main/Makefile.am 2012-03-26 16:18:18 UTC (rev 20776)
+++ Extractor/src/main/Makefile.am 2012-03-26 16:20:17 UTC (rev 20777)
@@ -36,6 +36,8 @@
libextractor_la_SOURCES = \
extractor.c \
+ extractor_plugpath.c \
+ extractor_plugins.c \
extractor_metatypes.c \
extractor_print.c
Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c 2012-03-26 16:18:18 UTC (rev 20776)
+++ Extractor/src/main/extractor.c 2012-03-26 16:20:17 UTC (rev 20777)
@@ -38,6 +38,8 @@
#include <zlib.h>
#endif
+#include "extractor_plugpath.h"
+#include "extractor_plugins.h"
/**
@@ -65,912 +67,6 @@
/**
- * Linked list of extractor plugins. An application builds this list
- * by telling libextractor to load various keyword-extraction
- * plugins. Libraries can also be unloaded (removed from this list,
- * see EXTRACTOR_plugin_remove).
- */
-struct EXTRACTOR_PluginList
-{
- /**
- * This is a linked list.
- */
- struct EXTRACTOR_PluginList *next;
-
- /**
- * Pointer to the plugin (as returned by lt_dlopen).
- */
- void * libraryHandle;
-
- /**
- * Name of the library (i.e., 'libextractor_foo.so')
- */
- char *libname;
-
- /**
- * Name of the library (i.e., 'libextractor_foo.so')
- */
- char *short_libname;
-
- /**
- * Pointer to the function used for meta data extraction.
- */
- EXTRACTOR_ExtractMethod extractMethod;
-
- /**
- * Options for the plugin.
- */
- char * plugin_options;
-
- /**
- * Special options for the plugin
- * (as returned by the plugin's "options" method;
- * typically NULL).
- */
- const char *specials;
-
- /**
- * Flags to control how the plugin is executed.
- */
- enum EXTRACTOR_Options flags;
-
- /**
- * Process ID of the child process for this plugin. 0 for
- * none.
- */
-#ifndef WINDOWS
- int cpid;
-#else
- HANDLE hProcess;
-#endif
-
- /**
- * Pipe used to send information about shared memory segments to
- * the child process. NULL if not initialized.
- */
- FILE *cpipe_in;
-
- /**
- * Pipe used to read information about extracted meta data from
- * the child process. -1 if not initialized.
- */
- int cpipe_out;
-};
-
-
-/**
- * Remove a trailing '/bin' from in (if present).
- */
-static char *
-cut_bin(char * in) {
- size_t p;
-
- if (in == NULL)
- return NULL;
- p = strlen(in);
- if (p > 4) {
- if ( (in[p-1] == '/') ||
- (in[p-1] == '\\') )
- in[--p] = '\0';
- if (0 == strcmp(&in[p-3],
- "bin")) {
- in[p-3] = '\0';
- p -= 3;
- }
- }
- return in;
-}
-
-#if LINUX
-/**
- * Try to determine path by reading /proc/PID/exe or
- * /proc/PID/maps.
- *
- * Note that this may fail if LE is installed in one directory
- * and the binary linking against it sits elsewhere.
- */
-static char *
-get_path_from_proc_exe() {
- char fn[64];
- char line[1024];
- char dir[1024];
- char * lnk;
- char * ret;
- char * lestr;
- ssize_t size;
- FILE * f;
-
- snprintf(fn,
- sizeof (fn),
- "/proc/%u/maps",
- getpid());
- f = FOPEN(fn, "r");
- if (f != NULL) {
- while (NULL != fgets(line, 1024, f)) {
- if ( (1 == sscanf(line,
- "%*x-%*x %*c%*c%*c%*c %*x %*2x:%*2x %*u%*[ ]%s",
- dir)) &&
- (NULL != (lestr = strstr(dir,
- "libextractor")) ) ) {
- lestr[0] = '\0';
- fclose(f);
- return strdup(dir);
- }
- }
- fclose(f);
- }
- snprintf(fn,
- sizeof (fn),
- "/proc/%u/exe",
- getpid());
- lnk = malloc(1029); /* 1024 + 5 for "lib/" catenation */
- if (lnk == NULL)
- return NULL;
- size = readlink(fn, lnk, 1023);
- if ( (size <= 0) || (size >= 1024) ) {
- free(lnk);
- return NULL;
- }
- lnk[size] = '\0';
- while ( (lnk[size] != '/') &&
- (size > 0) )
- size--;
- if ( (size < 4) ||
- (lnk[size-4] != '/') ) {
- /* not installed in "/bin/" -- binary path probably useless */
- free(lnk);
- return NULL;
- }
- lnk[size] = '\0';
- lnk = cut_bin(lnk);
- ret = realloc(lnk, strlen(lnk) + 5);
- if (ret == NULL)
- {
- free (lnk);
- return NULL;
- }
- strcat(ret, "lib/"); /* guess "lib/" as the library dir */
- return ret;
-}
-#endif
-
-#if WINDOWS
-/**
- * Try to determine path with win32-specific function
- */
-static char *
-get_path_from_module_filename() {
- char * path;
- char * ret;
- char * idx;
-
- path = malloc(4103); /* 4096+nil+6 for "/lib/" catenation */
- if (path == NULL)
- return NULL;
- GetModuleFileName(NULL, path, 4096);
- idx = path + strlen(path);
- while ( (idx > path) &&
- (*idx != '\\') &&
- (*idx != '/') )
- idx--;
- *idx = '\0';
- path = cut_bin(path);
- ret = realloc(path, strlen(path) + 6);
- if (ret == NULL)
- {
- free (path);
- return NULL;
- }
- strcat(ret, "/lib/"); /* guess "lib/" as the library dir */
- return ret;
-}
-#endif
-
-#if DARWIN
-static char * get_path_from_dyld_image() {
- const char * path;
- char * p, * s;
- int i;
- int c;
-
- p = NULL;
- c = _dyld_image_count();
- for (i = 0; i < c; i++) {
- if (_dyld_get_image_header(i) == &_mh_dylib_header) {
- path = _dyld_get_image_name(i);
- if (path != NULL && strlen(path) > 0) {
- p = strdup(path);
- if (p == NULL)
- return NULL;
- s = p + strlen(p);
- while ( (s > p) && (*s != '/') )
- s--;
- s++;
- *s = '\0';
- }
- break;
- }
- }
- return p;
-}
-#endif
-
-/**
- * This may also fail -- for example, if extract
- * is not also installed.
- */
-static char *
-get_path_from_PATH() {
- struct stat sbuf;
- char * path;
- char * pos;
- char * end;
- char * buf;
- char * ret;
- const char * p;
-
- p = getenv("PATH");
- if (p == NULL)
- return NULL;
- path = strdup(p); /* because we write on it */
- if (path == NULL)
- return NULL;
- buf = malloc(strlen(path) + 20);
- if (buf == NULL)
- {
- free (path);
- return NULL;
- }
- pos = path;
-
- while (NULL != (end = strchr(pos, ':'))) {
- *end = '\0';
- sprintf(buf, "%s/%s", pos, "extract");
- if (0 == stat(buf, &sbuf)) {
- pos = strdup(pos);
- free(buf);
- free(path);
- if (pos == NULL)
- return NULL;
- pos = cut_bin(pos);
- ret = realloc(pos, strlen(pos) + 5);
- if (ret == NULL)
- {
- free (pos);
- return NULL;
- }
- strcat(ret, "lib/");
- return ret;
- }
- pos = end + 1;
- }
- sprintf(buf, "%s/%s", pos, "extract");
- if (0 == stat(buf, &sbuf)) {
- pos = strdup(pos);
- free(buf);
- free(path);
- if (pos == NULL)
- return NULL;
- pos = cut_bin(pos);
- ret = realloc(pos, strlen(pos) + 5);
- if (ret == NULL)
- {
- free (pos);
- return NULL;
- }
- strcat(ret, "lib/");
- return ret;
- }
- free(buf);
- free(path);
- return NULL;
-}
-
-
-/**
- * Function to call on paths.
- *
- * @param cls closure
- * @param path a directory path
- */
-typedef void (*PathProcessor)(void *cls,
- const char *path);
-
-
-/**
- * Create a filename by appending 'fname' to 'path'.
- *
- * @param path the base path
- * @param fname the filename to append
- * @return '$path/$fname'
- */
-static char *
-append_to_dir (const char *path,
- const char *fname)
-{
- char *ret;
- size_t slen;
-
- slen = strlen (path);
- if (slen == 0)
- return NULL;
- if (fname[0] == DIR_SEPARATOR)
- fname++;
- ret = malloc (slen + strlen(fname) + 2);
- if (ret == NULL)
- return NULL;
-#ifdef MINGW
- if (path[slen-1] == '\\')
- sprintf (ret,
- "%s%s",
- path,
- fname);
- else
- sprintf (ret,
- "%s\\%s",
- path,
- fname);
-#else
- if (path[slen-1] == '/')
- sprintf (ret,
- "%s%s",
- path,
- fname);
- else
- sprintf (ret,
- "%s/%s",
- path,
- fname);
-#endif
- return ret;
-}
-
-
-/**
- * Iterate over all paths where we expect to find GNU libextractor
- * plugins.
- *
- * @param pp function to call for each path
- * @param pp_cls cls argument for pp.
- */
-static void
-get_installation_paths (PathProcessor pp,
- void *pp_cls)
-{
- const char *p;
- char * path;
- char * prefix;
- char * d;
-
- prefix = NULL;
- p = getenv("LIBEXTRACTOR_PREFIX");
- if (p != NULL)
- {
- d = strdup (p);
- if (d == NULL)
- return;
- prefix = strtok (d, PATH_SEPARATOR_STR);
- while (NULL != prefix)
- {
- pp (pp_cls, prefix);
- prefix = strtok (NULL, PATH_SEPARATOR_STR);
- }
- free (d);
- return;
- }
-#if LINUX
- if (prefix == NULL)
- prefix = get_path_from_proc_exe();
-#endif
-#if WINDOWS
- if (prefix == NULL)
- prefix = get_path_from_module_filename();
-#endif
-#if DARWIN
- if (prefix == NULL)
- prefix = get_path_from_dyld_image();
-#endif
- if (prefix == NULL)
- prefix = get_path_from_PATH();
- pp (pp_cls, PLUGININSTDIR);
- if (prefix == NULL)
- return;
- path = append_to_dir (prefix, PLUGINDIR);
- if (path != NULL)
- {
- if (0 != strcmp (path,
- PLUGININSTDIR))
- pp (pp_cls, path);
- free (path);
- }
- free (prefix);
-}
-
-
-struct SearchContext
-{
- const char *short_name;
- char *path;
-};
-
-
-/**
- * Load all plugins from the given directory.
- *
- * @param cls pointer to the "struct EXTRACTOR_PluginList*" to extend
- * @param path path to a directory with plugins
- */
-static void
-find_plugin_in_path (void *cls,
- const char *path)
-{
- struct SearchContext *sc = cls;
- DIR *dir;
- struct dirent *ent;
- const char *la;
- const char *sym_name;
- char *sym;
- char *dot;
-
- if (sc->path != NULL)
- return;
- dir = OPENDIR (path);
- if (NULL == dir)
- return;
- while (NULL != (ent = READDIR (dir)))
- {
- if (ent->d_name[0] == '.')
- continue;
- if ( (NULL != (la = strstr (ent->d_name, ".la"))) &&
- (la[3] == '\0') )
- continue; /* only load '.so' and '.dll' */
- sym_name = strstr (ent->d_name, "_");
- if (sym_name == NULL)
- continue;
- sym_name++;
- sym = strdup (sym_name);
- if (sym == NULL)
- {
- CLOSEDIR (dir);
- return;
- }
- dot = strstr (sym, ".");
- if (dot != NULL)
- *dot = '\0';
- if (0 == strcmp (sym, sc->short_name))
- {
- sc->path = append_to_dir (path, ent->d_name);
- free (sym);
- break;
- }
- free (sym);
- }
-#if DEBUG
- if (sc->path == NULL)
- fprintf (stderr,
- "Failed to find plugin `%s' in `%s'\n",
- sc->short_name,
- path);
-#endif
- CLOSEDIR (dir);
-}
-
-
-
-/**
- * Given a short name of a library (i.e. "mime"), find
- * the full path of the respective plugin.
- */
-static char *
-find_plugin (const char *short_name)
-{
- struct SearchContext sc;
-
- sc.path = NULL;
- sc.short_name = short_name;
- get_installation_paths (&find_plugin_in_path,
- &sc);
- return sc.path;
-}
-
-
-
-struct DefaultLoaderContext
-{
- struct EXTRACTOR_PluginList *res;
- enum EXTRACTOR_Options flags;
-};
-
-
-/**
- * Load all plugins from the given directory.
- *
- * @param cls pointer to the "struct EXTRACTOR_PluginList*" to extend
- * @param path path to a directory with plugins
- */
-static void
-load_plugins_from_dir (void *cls,
- const char *path)
-{
- struct DefaultLoaderContext *dlc = cls;
- DIR *dir;
- struct dirent *ent;
- const char *la;
- const char *sym_name;
- char *sym;
- char *dot;
-
- dir = opendir (path);
- if (NULL == dir)
- return;
- while (NULL != (ent = readdir (dir)))
- {
- if (ent->d_name[0] == '.')
- continue;
- if ( ( (NULL != (la = strstr (ent->d_name, ".la"))) &&
- (la[3] == '\0') ) ||
- ( (NULL != (la = strstr (ent->d_name, ".a"))) &&
- (la[2] == '\0')) )
- continue; /* only load '.so' and '.dll' */
-
- sym_name = strstr (ent->d_name, "_");
- if (sym_name == NULL)
- continue;
- sym_name++;
- sym = strdup (sym_name);
- if (NULL == sym)
- {
- closedir (dir);
- return;
- }
- dot = strstr (sym, ".");
- if (dot != NULL)
- *dot = '\0';
-#if DEBUG > 1
- fprintf (stderr,
- "Adding default plugin `%s'\n",
- sym);
-#endif
- dlc->res = EXTRACTOR_plugin_add (dlc->res,
- sym,
- NULL,
- dlc->flags);
- free (sym);
- }
- closedir (dir);
-}
-
-
-/**
- * Load the default set of plugins. The default can be changed
- * by setting the LIBEXTRACTOR_LIBRARIES environment variable.
- * If it is set to "env", then this function will return
- * EXTRACTOR_plugin_add_config (NULL, env, flags). Otherwise,
- * it will load all of the installed plugins and return them.
- *
- * @param flags options for all of the plugins loaded
- * @return the default set of plugins, NULL if no plugins were found
- */
-struct EXTRACTOR_PluginList *
-EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags)
-{
- struct DefaultLoaderContext dlc;
- char *env;
-
- env = getenv ("LIBEXTRACTOR_LIBRARIES");
- if (env != NULL)
- return EXTRACTOR_plugin_add_config (NULL, env, flags);
- dlc.res = NULL;
- dlc.flags = flags;
- get_installation_paths (&load_plugins_from_dir,
- &dlc);
- return dlc.res;
-}
-
-
-/**
- * Try to resolve a plugin function.
- *
- * @param lib_handle library to search for the symbol
- * @param prefix prefix to add
- * @param sym_name base name for the symbol
- * @param options set to special options requested by the plugin
- * @return NULL on error, otherwise pointer to the symbol
- */
-static void *
-get_symbol_with_prefix(void *lib_handle,
- const char *template,
- const char *prefix,
- const char **options)
-{
- char *name;
- void *symbol;
- const char *sym_name;
- char *sym;
- char *dot;
- const char *(*opt_fun)(void);
-
- if (NULL != options) *options = NULL;
- sym_name = strstr (prefix, "_");
- if (sym_name == NULL)
- return NULL;
- sym_name++;
- sym = strdup (sym_name);
- if (sym == NULL)
- return NULL;
- dot = strstr (sym, ".");
- if (dot != NULL)
- *dot = '\0';
- name = malloc(strlen(sym) + strlen(template) + 1);
- if (name == NULL)
- {
- free (sym);
- return NULL;
- }
- sprintf(name,
- template,
- sym);
- /* try without '_' first */
- symbol = lt_dlsym(lib_handle, name + 1);
- if (symbol==NULL)
- {
- /* now try with the '_' */
-#if DEBUG
- char *first_error = strdup (lt_dlerror());
-#endif
- symbol = lt_dlsym(lib_handle, name);
-#if DEBUG
- if (NULL == symbol)
- {
- fprintf(stderr,
- "Resolving symbol `%s' failed, "
- "so I tried `%s', but that failed also. Errors are: "
- "`%s' and `%s'.\n",
- name+1,
- name,
- first_error == NULL ? "out of memory" : first_error,
- lt_dlerror());
- }
- if (first_error != NULL)
- free(first_error);
-#endif
- }
-
- if ( (symbol != NULL) &&
- (NULL != options) )
- {
- /* get special options */
- sprintf(name,
- "_EXTRACTOR_%s_options",
- sym);
- /* try without '_' first */
- opt_fun = lt_dlsym(lib_handle, name + 1);
- if (opt_fun == NULL)
- opt_fun = lt_dlsym(lib_handle, name);
- if (opt_fun != NULL)
- *options = opt_fun ();
- }
- free (sym);
- free(name);
-
- return symbol;
-}
-
-
-/**
- * Load a plugin.
- *
- * @param plugin plugin to load
- * @return 0 on success, -1 on error
- */
-static int
-plugin_load (struct EXTRACTOR_PluginList *plugin)
-{
-#if WINDOWS
- wchar_t wlibname[4097];
- char llibname[4097];
-#endif
- lt_dladvise advise;
-
- if (plugin->libname == NULL)
- plugin->libname = find_plugin (plugin->short_libname);
- if (plugin->libname == NULL)
- {
-#if DEBUG
- fprintf (stderr,
- "Failed to find plugin `%s'\n",
- plugin->short_libname);
-#endif
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return -1;
- }
- lt_dladvise_init (&advise);
- lt_dladvise_ext (&advise);
- lt_dladvise_local (&advise);
-#if WINDOWS
- wlibname[0] = L'\0';
- llibname[0] = '\0';
- if (MultiByteToWideChar (CP_UTF8, 0, plugin->libname, -1, wlibname, 4097) <= 0
- || WideCharToMultiByte (CP_ACP, 0, wlibname, -1, llibname, 4097, NULL,
NULL) < 0)
- {
-#if DEBUG
- fprintf (stderr,
- "Loading `%s' plugin failed: %s\n",
- plugin->short_libname,
- "can't convert plugin name to local encoding");
- free (plugin->libname);
- plugin->libname = NULL;
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return -1;
-#endif
- }
- plugin->libraryHandle = lt_dlopenadvise (llibname,
- advise);
-#else
- plugin->libraryHandle = lt_dlopenadvise (plugin->libname,
- advise);
-#endif
- lt_dladvise_destroy(&advise);
- if (plugin->libraryHandle == NULL)
- {
-#if DEBUG
- fprintf (stderr,
- "Loading `%s' plugin failed: %s\n",
- plugin->short_libname,
- lt_dlerror ());
-#endif
- free (plugin->libname);
- plugin->libname = NULL;
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return -1;
- }
- plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle,
- "_EXTRACTOR_%s_extract",
- plugin->libname,
- &plugin->specials);
- if (plugin->extractMethod == NULL)
- {
-#if DEBUG
- fprintf (stderr,
- "Resolving `extract' method of plugin `%s' failed: %s\n",
- plugin->short_libname,
- lt_dlerror ());
-#endif
- lt_dlclose (plugin->libraryHandle);
- free (plugin->libname);
- plugin->libname = NULL;
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return -1;
- }
- return 0;
-}
-
-
-
-
-/**
- * Add a library for keyword extraction.
- *
- * @param prev the previous list of libraries, may be NULL
- * @param library the name of the library
- * @param flags options to use
- * @return the new list of libraries, equal to prev iff an error occured
- */
-struct EXTRACTOR_PluginList *
-EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev,
- const char *library,
- const char *options,
- enum EXTRACTOR_Options flags)
-{
- struct EXTRACTOR_PluginList *result;
- char *libname;
-
- libname = find_plugin (library);
- if (libname == NULL)
- {
- fprintf (stderr,
- "Could not load `%s'\n",
- library);
- return prev;
- }
- result = calloc (1, sizeof (struct EXTRACTOR_PluginList));
- if (result == NULL)
- return prev;
- result->next = prev;
- result->short_libname = strdup (library);
- if (result->short_libname == NULL)
- {
- free (result);
- return NULL;
- }
- result->libname = libname;
- result->flags = flags;
- if (NULL != options)
- result->plugin_options = strdup (options);
- else
- result->plugin_options = NULL;
- return result;
-}
-
-
-/**
- * Load multiple libraries as specified by the user.
- *
- * @param config a string given by the user that defines which
- * libraries should be loaded. Has the format
- * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
- * For example, 'mp3:ogg.so' loads the
- * mp3 and the ogg library. The '-' before the LIBRARYNAME
- * indicates that the library should be removed from
- * the library list.
- * @param prev the previous list of libraries, may be NULL
- * @param flags options to use
- * @return the new list of libraries, equal to prev iff an error occured
- * or if config was empty (or NULL).
- */
-struct EXTRACTOR_PluginList *
-EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev,
- const char *config,
- enum EXTRACTOR_Options flags)
-{
- char *cpy;
- size_t pos;
- size_t last;
- ssize_t lastconf;
- size_t len;
-
- if (config == NULL)
- return prev;
- len = strlen(config);
- cpy = strdup(config);
- if (cpy == NULL)
- return prev;
- pos = 0;
- last = 0;
- lastconf = 0;
- while (pos < len)
- {
- while ((cpy[pos] != ':') && (cpy[pos] != '\0') &&
- (cpy[pos] != '('))
- pos++;
- if( cpy[pos] == '(' ) {
- cpy[pos++] = '\0'; /* replace '(' by termination */
- lastconf = pos; /* start config from here, after (. */
- while ((cpy[pos] != '\0') && (cpy[pos] != ')'))
- pos++; /* config until ) or EOS. */
- if( cpy[pos] == ')' ) {
- cpy[pos++] = '\0'; /* write end of config here. */
- while ((cpy[pos] != ':') && (cpy[pos] != '\0'))
- pos++; /* forward until real end of string found. */
- cpy[pos++] = '\0';
- } else {
- cpy[pos++] = '\0'; /* end of string. */
- }
- } else {
- lastconf = -1; /* NULL config when no (). */
- cpy[pos++] = '\0'; /* replace ':' by termination */
- }
- if (cpy[last] == '-')
- {
- last++;
- prev = EXTRACTOR_plugin_remove (prev,
- &cpy[last]);
- }
- else
- {
- prev = EXTRACTOR_plugin_add (prev,
- &cpy[last],
- (lastconf != -1) ? &cpy[lastconf] : NULL,
- flags);
- }
- last = pos;
- }
- free (cpy);
- return prev;
-}
-
-
-/**
* Stop the child process of this plugin.
*/
static void
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r20777 - Extractor/src/main,
gnunet <=