[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r9183 - in Extractor-mono/LibExtractor: . src
From: |
gnunet |
Subject: |
[GNUnet-SVN] r9183 - in Extractor-mono/LibExtractor: . src |
Date: |
Sun, 18 Oct 2009 15:12:18 -0600 |
Author: patrick
Date: 2009-10-18 15:12:18 -0600 (Sun, 18 Oct 2009)
New Revision: 9183
Added:
Extractor-mono/LibExtractor/src/DuplicateOptions.cs
Modified:
Extractor-mono/LibExtractor/LibExtractor.mdp
Extractor-mono/LibExtractor/src/Extractor.cs
Extractor-mono/LibExtractor/src/Keyword.cs
Extractor-mono/LibExtractor/src/KeywordType.cs
Log:
* LibExtractor/src/Keyword.cs: fixed comments
* LibExtractor/LibExtractor.mdp: added DuplicateOptions.cs
* LibExtractor/src/Extractor.cs: added new extractor functions, renamed
variables, fixed comments
* LibExtractor/src/KeywordType.cs: fixed comments
* LibExtractor/src/DuplicateOptions.cs: new DuplicateOptions enum
Modified: Extractor-mono/LibExtractor/LibExtractor.mdp
===================================================================
--- Extractor-mono/LibExtractor/LibExtractor.mdp 2009-10-18 20:47:42 UTC
(rev 9182)
+++ Extractor-mono/LibExtractor/LibExtractor.mdp 2009-10-18 21:12:18 UTC
(rev 9183)
@@ -1,4 +1,4 @@
-<Project name="LibExtractor" fileversion="2.0" language="C#"
clr-version="Net_2_0" ctype="DotNetProject">
+<Project name="LibExtractor" fileversion="2.0" language="C#"
clr-version="Net_2_0" targetFramework="2.0" ctype="DotNetProject">
<Configurations active="Debug">
<Configuration name="Debug" ctype="DotNetProjectConfiguration">
<Output directory="bin/Debug" assembly="LibExtractor" />
@@ -20,6 +20,7 @@
<File name="src/Extractor.cs" subtype="Code" buildaction="Compile" />
<File name="src/Keyword.cs" subtype="Code" buildaction="Compile" />
<File name="src/KeywordType.cs" subtype="Code" buildaction="Compile" />
+ <File name="src/DuplicateOptions.cs" subtype="Code" buildaction="Compile"
/>
</Contents>
<References>
<ProjectReference type="Gac" localcopy="True" refto="System,
Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
Added: Extractor-mono/LibExtractor/src/DuplicateOptions.cs
===================================================================
--- Extractor-mono/LibExtractor/src/DuplicateOptions.cs
(rev 0)
+++ Extractor-mono/LibExtractor/src/DuplicateOptions.cs 2009-10-18 21:12:18 UTC
(rev 9183)
@@ -0,0 +1,32 @@
+// DuplicateOptions.cs
+//
+// Copyright (C) 2009 Patrick Ulbrich, address@hidden
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+using System;
+
+namespace LibExtractor
+{
+ public enum DuplicateOptions
+ {
+ NONE = 0,
+ /* ignore the 'type' of the keyword when eliminating duplicates
*/
+ DUPLICATES_TYPELESS = 1,
+ /* remove type 'UNKNOWN' if there is a duplicate keyword of
+ known type, even if usually different types should be
+ preserved */
+ DUPLICATES_REMOVE_UNKNOWN = 2
+ }
+}
Modified: Extractor-mono/LibExtractor/src/Extractor.cs
===================================================================
--- Extractor-mono/LibExtractor/src/Extractor.cs 2009-10-18 20:47:42 UTC
(rev 9182)
+++ Extractor-mono/LibExtractor/src/Extractor.cs 2009-10-18 21:12:18 UTC
(rev 9183)
@@ -1,6 +1,6 @@
// Extractor.cs
//
-// Copyright (C) 2008 Patrick Ulbrich, address@hidden
+// Copyright (C) 2008, 2009 Patrick Ulbrich, address@hidden
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -16,6 +16,20 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
+// NOTE:
+//
+// The following functions have been implemented directly (based on the
libextractor original code)
+// as a pinvoke call into the native library would involve a complicated
conversion
+// of the managed Keyword[] array into a unmanaged linked list.
+// On top of that the native library would also try to free that list :-(.
+// The code of those functions is so simple that it isn't worth it anyway...
+//
+// EXTRACTOR_KeywordList *
EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list, unsigned int
options);
+// EXTRACTOR_KeywordList * EXTRACTOR_removeEmptyKeywords
(EXTRACTOR_KeywordList * list);
+// EXTRACTOR_KeywordList *
EXTRACTOR_removeKeywordsOfType(EXTRACTOR_KeywordList * list,
EXTRACTOR_KeywordType type);
+// const char * EXTRACTOR_extractLast(EXTRACTOR_KeywordType type,
EXTRACTOR_KeywordList * keywords);
+// const char * EXTRACTOR_extractLastByString(const char * type,
EXTRACTOR_KeywordList * keywords);
+
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
@@ -29,7 +43,7 @@
public Extractor() {
disposed = false;
- pExtractors = IntPtr.Zero;
+ pExtractors = IntPtr.Zero;
}
~Extractor() {
@@ -50,28 +64,28 @@
public void LoadConfigLibraries(string config) {
EnsureNotDisposed();
EnsureValidStringParam(config, "config");
- // prev parameter may be null, so don't test for loaded
extractors
+ // prev parameter may be null, so don't test for loaded
extractors.
pExtractors =
EXTRACTOR_loadConfigLibraries(pExtractors, config);
}
public void AddLibrary(string library) {
EnsureNotDisposed();
EnsureValidStringParam(library, "library");
- // prev parameter may be null, so don't test for loaded
extractors
+ // prev parameter may be null, so don't test for loaded
extractors.
pExtractors = EXTRACTOR_addLibrary(pExtractors,
library);
}
public void AddLibraryLast(string library) {
EnsureNotDisposed();
EnsureValidStringParam(library, "library");
- // prev parameter may be null, so don't test for loaded
extractors
+ // prev parameter may be null, so don't test for loaded
extractors.
pExtractors = EXTRACTOR_addLibraryLast(pExtractors,
library);
}
public void RemoveLibrary(string library) {
EnsureNotDisposed();
EnsureValidStringParam(library, "library");
- // prev parameter may be null, so don't test for loaded
extractors
+ // prev parameter may be null, so don't test for loaded
extractors.
pExtractors = EXTRACTOR_removeLibrary(pExtractors,
library);
}
@@ -142,7 +156,7 @@
/// Static members
///
- // returns an Extractor instance with the default library set
loaded
+ // Returns an Extractor instance with the default library set
loaded.
public static Extractor GetDefault() {
Extractor e = new Extractor();
e.LoadDefaultLibraries();
@@ -150,7 +164,7 @@
}
public static string GetKeywordTypeAsString(KeywordType type) {
- // NOTE : string does NOT need to be freed
+ // NOTE : string does NOT need to be freed.
IntPtr pStr = EXTRACTOR_getKeywordTypeAsString(type);
string str = Marshal.PtrToStringAnsi(pStr);
return str;
@@ -160,6 +174,104 @@
return EXTRACTOR_getHighestKeywordTypeNumber();
}
+ public static Keyword[] RemoveDuplicateKeywords(Keyword[]
keywords, DuplicateOptions options) {
+ List<Keyword> lst = new List<Keyword>();
+
+ for (int i = 0; i < keywords.Length; i++) {
+ Keyword pos = keywords[i];
+ bool remove = false;
+
+ for (int j = 0; j < lst.Count; j++) {
+ KeywordType type =
lst[j].keywordType;
+ string keyword =
lst[j].keyword;
+
+ if ( (pos.keyword == keyword) &&
+ ( (pos.keywordType == type) ||
+ ( ((options &
DuplicateOptions.DUPLICATES_TYPELESS) > 0) &&
+ ( (pos.keywordType ==
KeywordType.EXTRACTOR_SPLIT) ||
+ (type !=
KeywordType.EXTRACTOR_SPLIT)) ) ||
+ ( ((options &
DuplicateOptions.DUPLICATES_REMOVE_UNKNOWN) > 0) &&
+ (pos.keywordType ==
KeywordType.EXTRACTOR_UNKNOWN)) ) ) {
+ remove = true;
+ break; // break inner for
+ }
+ }
+
+ if (!remove) {
+ lst.Add(pos);
+ }
+ }
+
+ if (lst.Count == keywords.Length)
+ return keywords;
+ else
+ return lst.ToArray();
+ }
+
+ public static Keyword[] RemoveEmptyKeywords(Keyword[] keywords)
{
+ List<Keyword> lst = new List<Keyword>();
+
+ for (int i = 0; i < keywords.Length; i++) {
+ Keyword pos = keywords[i];
+ string keyword = pos.keyword;
+ bool allWhite = true;
+
+ for (int j = 0; j < keyword.Length; j++) {
+ if (!char.IsWhiteSpace(keyword[j])) {
+ allWhite = false;
+ break;
+ }
+ }
+
+ if (!allWhite)
+ lst.Add(pos);
+ }
+
+ if (lst.Count == keywords.Length)
+ return keywords;
+ else
+ return lst.ToArray();
+ }
+
+ public static Keyword[] RemoveKeywordsOfType(Keyword[]
keywords, KeywordType type) {
+ List<Keyword> lst = new List<Keyword>();
+
+ for (int i = 0; i < keywords.Length; i++) {
+ Keyword pos = keywords[i];
+ if (pos.keywordType != type) {
+ lst.Add(pos);
+ }
+ }
+
+ if (lst.Count == keywords.Length)
+ return keywords;
+ else
+ return lst.ToArray();
+ }
+
+ public static string ExtractLast(KeywordType type, Keyword[]
keywords) {
+ string result = null;
+ for (int i = 0; i < keywords.Length; i++) {
+ Keyword pos = keywords[i];
+ if (pos.keywordType == type) {
+ result = pos.keyword;
+ }
+ }
+ return result;
+ }
+
+ // NOTE : does not work with translated strings.
+ public static string ExtractLastByString(string type, Keyword[]
keywords) {
+ string result = null;
+ for (int i = 0; i < keywords.Length; i++) {
+ Keyword pos = keywords[i];
+ if (GetKeywordTypeAsString(pos.keywordType) ==
type) {
+ result = pos.keyword;
+ }
+ }
+ return result;
+ }
+
///
/// Cleanup stuff
///
@@ -230,7 +342,7 @@
private static extern IntPtr EXTRACTOR_getKeywords(IntPtr
extractors, string filename);
[DllImport("libextractor")]
- private static extern IntPtr EXTRACTOR_getKeywords2(IntPtr
extractor, IntPtr data, int size);
+ private static extern IntPtr EXTRACTOR_getKeywords2(IntPtr
extractors, IntPtr data, int size);
[DllImport("libextractor")]
private static extern void EXTRACTOR_freeKeywords(IntPtr
keywords);
Modified: Extractor-mono/LibExtractor/src/Keyword.cs
===================================================================
--- Extractor-mono/LibExtractor/src/Keyword.cs 2009-10-18 20:47:42 UTC (rev
9182)
+++ Extractor-mono/LibExtractor/src/Keyword.cs 2009-10-18 21:12:18 UTC (rev
9183)
@@ -1,6 +1,6 @@
// Keyword.cs
//
-// Copyright (C) 2008 Patrick Ulbrich, address@hidden
+// Copyright (C) 2008, 2009 Patrick Ulbrich, address@hidden
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
Modified: Extractor-mono/LibExtractor/src/KeywordType.cs
===================================================================
--- Extractor-mono/LibExtractor/src/KeywordType.cs 2009-10-18 20:47:42 UTC
(rev 9182)
+++ Extractor-mono/LibExtractor/src/KeywordType.cs 2009-10-18 21:12:18 UTC
(rev 9183)
@@ -1,6 +1,6 @@
// KeywordType.cs
//
-// Copyright (C) 2008 Patrick Ulbrich, address@hidden
+// Copyright (C) 2008, 2009 Patrick Ulbrich, address@hidden
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r9183 - in Extractor-mono/LibExtractor: . src,
gnunet <=