[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r4082 - in Extractor: . doc src/main src/plugins src/plugin
From: |
grothoff |
Subject: |
[GNUnet-SVN] r4082 - in Extractor: . doc src/main src/plugins src/plugins/thumbnail |
Date: |
Thu, 28 Dec 2006 19:23:42 -0800 (PST) |
Author: grothoff
Date: 2006-12-28 19:23:38 -0800 (Thu, 28 Dec 2006)
New Revision: 4082
Modified:
Extractor/ChangeLog
Extractor/doc/extract.1
Extractor/src/main/extract.c
Extractor/src/plugins/splitextractor.c
Extractor/src/plugins/thumbnail/thumbnailextractor.c
Log:
fixing Mantis #1125 and bug in splitextractor
Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-12-29 00:44:34 UTC (rev 4081)
+++ Extractor/ChangeLog 2006-12-29 03:23:38 UTC (rev 4082)
@@ -1,3 +1,9 @@
+Thu Dec 28 20:22:20 MST 2006
+ Fixed bug in splitextractor, addressing also Mantis #1125.
+
+Thu Dec 28 18:12:15 MST 2006
+ Added -g (greppable output, Mantis #1157) option to extact.
+
Mon Nov 20 22:08:55 EET 2006
Added an SID (C64 music file) plugin
Modified: Extractor/doc/extract.1
===================================================================
--- Extractor/doc/extract.1 2006-12-29 00:44:34 UTC (rev 4081)
+++ Extractor/doc/extract.1 2006-12-29 03:23:38 UTC (rev 4082)
@@ -1,4 +1,4 @@
-.TH EXTRACT 1 "April 28, 2005" "libextractor 0.5.11"
+.TH EXTRACT 1 "Dec 29, 2006" "libextractor 0.5.17"
.\" $Id
.SH NAME
extract
@@ -6,7 +6,7 @@
.SH SYNOPSIS
.B extract
[
-.B \-abdfhLnrsvV
+.B \-abdfghLnrsvV
]
[
.B \-B
@@ -32,7 +32,7 @@
\&...
.br
.SH DESCRIPTION
-This manual page documents version 0.5.11 of the
+This manual page documents version 0.5.17 of the
.B extract
command.
.PP
@@ -63,6 +63,9 @@
.B \-f
add the filename(s) (without directory) to the list of keywords.
.TP 8
+.B \-g
+Use grep-friendly output (all keywords on a single line for each file). Use
the verbose option to print the filename first, followed by the keywords. This
option will not print keyword types or non-textual metadata.
+.TP 8
.B \-h
Print a brief summary of the options.
.TP 8
Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c 2006-12-29 00:44:34 UTC (rev 4081)
+++ Extractor/src/main/extract.c 2006-12-29 03:23:38 UTC (rev 4082)
@@ -132,6 +132,8 @@
gettext_noop("remove duplicates only if types match") },
{ 'f', "filename", NULL,
gettext_noop("use the filename as a keyword (loads filename-extractor
plugin)") },
+ { 'g', "grep-friendly", NULL,
+ gettext_noop("produce grep-friendly output (all results on one line per
file)") },
{ 'h', "help", NULL,
gettext_noop("print this help") },
{ 'H', "hash", "ALGORITHM",
@@ -167,7 +169,7 @@
/**
* Print a keyword list to a file.
- * For debugging.
+ *
* @param handle the file to write to (stdout, stderr), may NOT be NULL
* @param keywords the list of keywords to print, may be NULL
* @param print array indicating which types to print
@@ -180,24 +182,19 @@
{
char * keyword;
iconv_t cd;
- char * buf;
- cd = iconv_open(
- nl_langinfo(CODESET)
- , "UTF-8");
+ cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
while (keywords != NULL) {
- buf = NULL;
- if (cd != (iconv_t) -1)
- keyword = iconvHelper(cd,
- keywords->keyword);
- else
- keyword = strdup(keywords->keyword);
-
- if (keywords->keywordType == EXTRACTOR_THUMBNAIL_DATA) {
+ if (EXTRACTOR_isBinaryType(keywords->keywordType)) {
fprintf (handle,
_("%s - (binary)\n"),
_(EXTRACTOR_getKeywordTypeAsString(keywords->keywordType)));
} else {
+ if (cd != (iconv_t) -1)
+ keyword = iconvHelper(cd,
+ keywords->keyword);
+ else
+ keyword = strdup(keywords->keyword);
if (NULL == EXTRACTOR_getKeywordTypeAsString(keywords->keywordType)) {
if (verbose == YES) {
fprintf(handle,
@@ -209,8 +206,8 @@
"%s - %s\n",
_(EXTRACTOR_getKeywordTypeAsString(keywords->keywordType)),
keyword);
+ free(keyword);
}
- free(keyword);
keywords = keywords->next;
}
if (cd != (iconv_t) -1)
@@ -218,6 +215,42 @@
}
/**
+ * Print a keyword list to a file in a grep-friendly manner.
+ *
+ * @param handle the file to write to (stdout, stderr), may NOT be NULL
+ * @param keywords the list of keywords to print, may be NULL
+ * @param print array indicating which types to print
+ */
+static void
+printSelectedKeywordsGrepFriendly(FILE * handle,
+ EXTRACTOR_KeywordList * keywords,
+ const int * print,
+ const int verbose)
+{
+ char * keyword;
+ iconv_t cd;
+
+ cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
+ while (keywords != NULL) {
+ if ( (EXTRACTOR_isBinaryType(EXTRACTOR_THUMBNAIL_DATA)) &&
+ (print[keywords->keywordType] == YES) ) {
+ if (cd != (iconv_t) -1)
+ keyword = iconvHelper(cd,
+ keywords->keyword);
+ else
+ keyword = strdup(keywords->keyword);
+ fprintf (handle,
+ (keywords->next == NULL) ? "%s" : "%s ",
+ keyword);
+ free(keyword);
+ }
+ keywords = keywords->next;
+ }
+ if (cd != (iconv_t) -1)
+ iconv_close(cd);
+}
+
+/**
* Take title, auth, year and return a string
*/
static char *
@@ -390,6 +423,7 @@
int defaultAll = YES;
int duplicates = EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN;
int bibtex = NO;
+ int grepfriendly = NO;
char * binary = NULL;
int ret = 0;
@@ -413,6 +447,7 @@
{"bibtex", 0, 0, 'b'},
{"duplicates", 0, 0, 'd'},
{"filename", 0, 0, 'f'},
+ {"grep-friendly", 0, 0, 'g'},
{"help", 0, 0, 'h'},
{"hash", 1, 0, 'H'},
{"list", 0, 0, 'L'},
@@ -451,6 +486,9 @@
case 'f':
useFilename = YES;
break;
+ case 'g':
+ grepfriendly = YES;
+ break;
case 'h':
printHelp();
return 0;
@@ -612,10 +650,18 @@
}
if ( (duplicates != -1) || (bibtex == YES))
keywords = EXTRACTOR_removeDuplicateKeywords (keywords, duplicates);
- if (verbose == YES && bibtex == NO)
- printf (_("Keywords for file %s:\n"), argv[i]);
+ if ( ( (verbose == YES) || (grepfriendly == YES) )
+ && (bibtex == NO) ) {
+ if (grepfriendly == YES)
+ printf ("%s", argv[i]);
+ else
+ printf (_("Keywords for file %s:"),
+ argv[i]);
+ }
if (bibtex == YES)
printSelectedKeywordsBibtex (stdout, keywords, print, argv[i]);
+ else if (grepfriendly == YES)
+ printSelectedKeywordsGrepFriendly(stdout, keywords, print, verbose);
else
printSelectedKeywords (stdout, keywords, print, verbose);
if (verbose == YES && bibtex == NO)
Modified: Extractor/src/plugins/splitextractor.c
===================================================================
--- Extractor/src/plugins/splitextractor.c 2006-12-29 00:44:34 UTC (rev
4081)
+++ Extractor/src/plugins/splitextractor.c 2006-12-29 03:23:38 UTC (rev
4082)
@@ -21,53 +21,63 @@
#include "platform.h"
#include "extractor.h"
-static char * TOKENIZERS = "._ ,address@hidden(){}";
+/**
+ * Default split characters.
+ */
+static const char * TOKENIZERS = "._ ,address@hidden(){}";
+
+/**
+ * Do not use keywords shorter than this minimum
+ * length.
+ */
static int MINIMUM_KEYWORD_LENGTH = 4;
static void addKeyword(struct EXTRACTOR_Keywords ** list,
- const char * keyword,
- EXTRACTOR_KeywordType type) {
+ const char * keyword) {
EXTRACTOR_KeywordList * next;
next = malloc(sizeof(EXTRACTOR_KeywordList));
next->next = *list;
next->keyword = strdup(keyword);
- next->keywordType = type;
+ next->keywordType = EXTRACTOR_SPLIT;
*list = next;
}
static int token(char letter,
const char * options) {
- int i;
-
- if (options == NULL)
- options = TOKENIZERS;
- for (i=0;i<strlen(TOKENIZERS);i++)
- if (letter == TOKENIZERS[i])
+ size_t i;
+
+ i = 0;
+ while (options[i] != '\0') {
+ if (letter == options[i])
return 1;
+ i++;
+ }
return 0;
}
static void splitKeywords(const char * keyword,
- EXTRACTOR_KeywordType type,
struct EXTRACTOR_Keywords ** list,
const char * options) {
char * dp;
- int pos;
- int last;
- int len;
+ size_t pos;
+ size_t last;
+ size_t len;
dp = strdup(keyword);
len = strlen(dp);
pos = 0;
last = 0;
while (pos < len) {
- while ((!token(dp[pos],
-
options)) && (pos < len))
+ while ( (0 == token(dp[pos], options)) &&
+ (pos < len) )
pos++;
- dp[pos++] = 0;
- if (strlen(&dp[last]) >= MINIMUM_KEYWORD_LENGTH) {
- addKeyword(list, &dp[last], type);
- }
+ dp[pos++] = '\0';
+ if (pos - last > MINIMUM_KEYWORD_LENGTH)
+ addKeyword(list,
+ &dp[last]);
+ while ( (1 == token(dp[pos], options)) &&
+ (pos < len) )
+ pos++;
last = pos;
}
free(dp);
@@ -82,13 +92,16 @@
const char * options) {
struct EXTRACTOR_Keywords * pos;
+ if (options == NULL)
+ options = TOKENIZERS;
pos = prev;
while (pos != NULL) {
- splitKeywords(pos->keyword,
- EXTRACTOR_SPLIT,
+ splitKeywords(pos->keyword,
&prev,
options);
pos = pos->next;
}
return prev;
}
+
+/* end of splitextractor.c */
Modified: Extractor/src/plugins/thumbnail/thumbnailextractor.c
===================================================================
--- Extractor/src/plugins/thumbnail/thumbnailextractor.c 2006-12-29
00:44:34 UTC (rev 4081)
+++ Extractor/src/plugins/thumbnail/thumbnailextractor.c 2006-12-29
03:23:38 UTC (rev 4082)
@@ -75,10 +75,11 @@
NULL,
};
-struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char *
filename,
- const unsigned char
* data,
- size_t size,
- struct
EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords *
+libextractor_thumbnail_extract(const char * filename,
+ const unsigned char * data,
+ size_t size,
+ struct EXTRACTOR_Keywords * prev) {
GdkPixbufLoader * loader;
GdkPixbuf * in;
GdkPixbuf * out;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r4082 - in Extractor: . doc src/main src/plugins src/plugins/thumbnail,
grothoff <=