gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r20975 - Extractor/src/plugins


From: gnunet
Subject: [GNUnet-SVN] r20975 - Extractor/src/plugins
Date: Fri, 13 Apr 2012 09:26:16 +0200

Author: grothoff
Date: 2012-04-13 09:26:16 +0200 (Fri, 13 Apr 2012)
New Revision: 20975

Modified:
   Extractor/src/plugins/Makefile.am
   Extractor/src/plugins/ebml_extractor.c
   Extractor/src/plugins/mp3_extractor.c
   Extractor/src/plugins/s3m_extractor.c
   Extractor/src/plugins/template_extractor.c
Log:
-LRN: misc patches:
/home/grothoff/0001-Rewrite-the-template-more-like-documentation-now.patch  
/home/grothoff/0003-Minimally-ported-s3m-extractor.patch
/home/grothoff/0002-New-header-for-arch-definitions.patch                   
/home/grothoff/0004-Fixed-template-doc-added-architecture-header.patch



Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am   2012-04-12 21:04:51 UTC (rev 20974)
+++ Extractor/src/plugins/Makefile.am   2012-04-13 07:26:16 UTC (rev 20975)
@@ -15,6 +15,7 @@
   libextractor_id3.la \
   libextractor_id3v2.la \
   libextractor_ebml.la \
+  libextractor_s3m.la \
   libextractor_mp3.la
 
 libextractor_mp3_la_SOURCES = \
@@ -49,4 +50,12 @@
   $(top_builddir)/src/main/libextractor.la \
   $(top_builddir)/src/common/libextractor_common.la
 
+libextractor_s3m_la_SOURCES = \
+  s3m_extractor.c
+libextractor_s3m_la_LDFLAGS = \
+  $(PLUGINFLAGS)
+libextractor_s3m_la_LIBADD = \
+  $(top_builddir)/src/main/libextractor.la \
+  $(top_builddir)/src/common/libextractor_common.la
+
 EXTRA_DIST = template_extractor.c 

Modified: Extractor/src/plugins/ebml_extractor.c
===================================================================
--- Extractor/src/plugins/ebml_extractor.c      2012-04-12 21:04:51 UTC (rev 
20974)
+++ Extractor/src/plugins/ebml_extractor.c      2012-04-13 07:26:16 UTC (rev 
20975)
@@ -28,42 +28,13 @@
 #include "extractor.h"
 #include <stdint.h>
 
+#include "le_architecture.h"
+
 #ifndef DEBUG_EBML
 # define DEBUG_EBML 0
 #endif
 
 #if WINDOWS
-#include <sys/param.h>          /* #define BYTE_ORDER */
-#endif
-#ifndef __BYTE_ORDER
-#ifdef _BYTE_ORDER
-#define __BYTE_ORDER _BYTE_ORDER
-#else
-#ifdef BYTE_ORDER
-#define __BYTE_ORDER BYTE_ORDER
-#endif
-#endif
-#endif
-#ifndef __BIG_ENDIAN
-#ifdef _BIG_ENDIAN
-#define __BIG_ENDIAN _BIG_ENDIAN
-#else
-#ifdef BIG_ENDIAN
-#define __BIG_ENDIAN BIG_ENDIAN
-#endif
-#endif
-#endif
-#ifndef __LITTLE_ENDIAN
-#ifdef _LITTLE_ENDIAN
-#define __LITTLE_ENDIAN _LITTLE_ENDIAN
-#else
-#ifdef LITTLE_ENDIAN
-#define __LITTLE_ENDIAN LITTLE_ENDIAN
-#endif
-#endif
-#endif
-
-#if WINDOWS
 /* According to 
http://old.nabble.com/Porting-localtime_r-and-gmtime_r-td15282276.html
  * msvcrt.dll does have thread-safe gmtime implementation,
  * even though the documentation says otherwise.

Modified: Extractor/src/plugins/mp3_extractor.c
===================================================================
--- Extractor/src/plugins/mp3_extractor.c       2012-04-12 21:04:51 UTC (rev 
20974)
+++ Extractor/src/plugins/mp3_extractor.c       2012-04-13 07:26:16 UTC (rev 
20975)
@@ -38,36 +38,7 @@
 
 #include "extractor_plugins.h"
 
-#if WINDOWS
-#include <sys/param.h>          /* #define BYTE_ORDER */
-#endif
-#ifndef __BYTE_ORDER
-#ifdef _BYTE_ORDER
-#define __BYTE_ORDER _BYTE_ORDER
-#else
-#ifdef BYTE_ORDER
-#define __BYTE_ORDER BYTE_ORDER
-#endif
-#endif
-#endif
-#ifndef __BIG_ENDIAN
-#ifdef _BIG_ENDIAN
-#define __BIG_ENDIAN _BIG_ENDIAN
-#else
-#ifdef BIG_ENDIAN
-#define __BIG_ENDIAN BIG_ENDIAN
-#endif
-#endif
-#endif
-#ifndef __LITTLE_ENDIAN
-#ifdef _LITTLE_ENDIAN
-#define __LITTLE_ENDIAN _LITTLE_ENDIAN
-#else
-#ifdef LITTLE_ENDIAN
-#define __LITTLE_ENDIAN LITTLE_ENDIAN
-#endif
-#endif
-#endif
+#include "le_architecture.h"
 
 #define LARGEST_FRAME_SIZE 8065
 

Modified: Extractor/src/plugins/s3m_extractor.c
===================================================================
--- Extractor/src/plugins/s3m_extractor.c       2012-04-12 21:04:51 UTC (rev 
20974)
+++ Extractor/src/plugins/s3m_extractor.c       2012-04-13 07:26:16 UTC (rev 
20975)
@@ -1,68 +1,94 @@
 /*
- * This file is part of libextractor.
- * (C) 2008 Toni Ruottu
- *
- * libextractor is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published
- * by the Free Software Foundation; either version 2, or (at your
- * option) any later version.
- *
- * libextractor is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with libextractor; see the file COPYING.  If not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
+     This file is part of libextractor.
+     (C) 2002, 2003, 2004, 2009 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 2, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
  */
 
 #include "platform.h"
 #include "extractor.h"
-#include "convert.h"
 
-#define HEADER_SIZE  0x70
+#include "extractor_plugins.h"
+#include "le_architecture.h"
 
-struct header
+/* Based upon ST 3.20 spec at http://16-bits.org/s3m/ */
+/* Looks like the format was defined by the software implementation,
+ * and that implementation was for little-endian platform, which means
+ * that the format is little-endian.
+ */
+
+LE_NETWORK_STRUCT_BEGIN
+struct S3MHeader
 {
-  char title[28];
-  char something[16];
-  char magicid[4];
+  char song_name[28];
+  uint8_t byte_1A;
+  uint8_t file_type; /* 0x10 == ST3 module */
+  uint8_t unknown1[2];
+  uint16_t number_of_orders; /* should be even */
+  uint16_t number_of_instruments;
+  uint16_t number_of_patterns;
+  uint16_t flags;
+  uint16_t created_with_version;
+  uint16_t file_format_info;
+  char SCRM[4];
+  uint8_t global_volume;
+  uint8_t initial_speed;
+  uint8_t initial_tempo;
+  uint8_t master_volume;
+  uint8_t ultra_click_removal;
+  uint8_t default_channel_positions;
+  uint8_t unknown2[8];
+  uint16_t special;
+  uint8_t channel_settings[32];
 };
+LE_NETWORK_STRUCT_END
 
-#define ADD(s,t) do { if (0 != proc (proc_cls, "s3m", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
+#define ADD(s,t) if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, 
"text/plain", s, strlen(s) + 1)) return 1
+#define ADDL(s,t,l) if (0 != proc (proc_cls, "s3m", t, 
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, l)) return 1
 
-
-/* "extract" keyword from a Scream Tracker 3 Module
- *
- * "Scream Tracker 3.01 BETA File Formats And Mixing Info"
- * was used, while this piece of software was originally
- * written.
- *
- */
-int 
-EXTRACTOR_s3m_extract (const unsigned char *data,
-                      size_t size,
-                      EXTRACTOR_MetaDataProcessor proc,
-                      void *proc_cls,
-                      const char *options)
+int
+EXTRACTOR_s3m_extract_method (struct EXTRACTOR_PluginList *plugin,
+    EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  char title[29];
-  const struct header *head;
+  int64_t offset;
+  unsigned char *data;
+  struct S3MHeader header;
+  char song_name_NT[29];
 
-  /* Check header size */
+  if (plugin == NULL)
+    return 1;
+  if (sizeof (header) != pl_read (plugin, &data, sizeof (header)))
+    return 1;
+  memcpy (&header, data, sizeof (header));
+  if (header.byte_1A != 0x1A || memcmp (header.SCRM, "SCRM", 4) != 0)
+    return 1;
+  header.number_of_orders = LE_le16toh (header.number_of_orders);
+  header.number_of_instruments = LE_le16toh (header.number_of_instruments);
+  header.number_of_patterns = LE_le16toh (header.number_of_patterns);
+  header.flags = LE_le16toh (header.flags);
+  header.created_with_version = LE_le16toh (header.created_with_version);
+  header.file_format_info = LE_le16toh (header.file_format_info);
+  header.special = LE_le16toh (header.special);
+  memcpy (song_name_NT, header.song_name, 28);
+  song_name_NT[28] = '\0';
 
-  if (size < HEADER_SIZE)    
-    return 0;    
-  head = (const struct header *) data;
-  if (memcmp (head->magicid, "SCRM", 4))
-    return 0;
-  ADD ("audio/x-s3m", EXTRACTOR_METATYPE_MIMETYPE);
-
-  memcpy (&title, head->title, 28);
-  title[28] = '\0';
-  ADD (title, EXTRACTOR_METATYPE_TITLE);
-  return 0;
+  ADD("audio/x-s3m", EXTRACTOR_METATYPE_MIMETYPE);
+  ADD(song_name_NT, EXTRACTOR_METATYPE_TITLE);
+  /* TODO: turn other header data into useful metadata (i.e. RESOURCE_TYPE).
+   * Also, disabled instruments can be (and are) used to carry user-defined 
text.
+   */
+  return 1;
 }

Modified: Extractor/src/plugins/template_extractor.c
===================================================================
--- Extractor/src/plugins/template_extractor.c  2012-04-12 21:04:51 UTC (rev 
20974)
+++ Extractor/src/plugins/template_extractor.c  2012-04-13 07:26:16 UTC (rev 
20975)
@@ -22,112 +22,65 @@
 #include "extractor.h"
 
 #include "extractor_plugins.h"
+#include "le_architecture.h"
 
-struct template_state
-{
-  int state;
-
-  /* more state fields here
-   * all variables that should survive more than one atomic read
-   * from the "file" are to be placed here.
-   */
-};
-
-enum TemplateState
-{
-  TEMPLATE_INVALID = -1,
-  TEMPLATE_LOOKING_FOR_FOO = 0,
-  TEMPLATE_READING_FOO,
-  TEMPLATE_READING_BAR,
-  TEMPLATE_SEEKING_TO_ZOOL
-};
-
-void
-EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin)
-{
-  struct template_state *state;
-  state = plugin->state = malloc (sizeof (struct template_state));
-  if (state == NULL)
-    return;
-  state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */
-  /* initialize other fields to their "uninitialized" values or defaults */
-}
-
-void
-EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin)
-{
-  if (plugin->state != NULL)
-  {
-    /* free other state fields that are heap-allocated */
-    free (plugin->state);
-  }
-  plugin->state = NULL;
-}
-
 int
 EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin,
     EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  int64_t file_position;
-  int64_t file_size;
-  size_t offset = 0;
-  size_t size;
+  int64_t offset;
   unsigned char *data;
-  unsigned char *ff;
-  struct mp3_state *state;
 
   /* temporary variables are declared here */
 
-  if (plugin == NULL || plugin->state == NULL)
+  if (plugin == NULL)
     return 1;
 
-  /* for easier access (and conforms better with the old plugins var names) */
-  state = plugin->state;
-  file_position = plugin->position;
-  file_size = plugin->fsize;
-  size = plugin->map_size;
-  data = plugin->shm_ptr;
+  /* initialize state here */
 
-  /* sanity checks */
-  if (plugin->seek_request < 0)
-    return 1;
-  if (file_position - plugin->seek_request > 0)
-  {
-    plugin->seek_request = -1;
-    return 1;
-  }
-  if (plugin->seek_request - file_position < size)
-    offset = plugin->seek_request - file_position;
-
-  while (1)
-  {
-    switch (state->state)
-    {
-    case TEMPLATE_INVALID:
-      plugin->seek_request = -1;
-      return 1;
-    case TEMPLATE_LOOKING_FOR_FOO:
-      /* Find FOO in data buffer.
-       * If found, set offset to its position and set state to 
TEMPLATE_READING_FOO
-       * If not found, set seek_request to file_position + offset and return 1
-       * (but it's better to give up as early as possible, to avoid reading 
the whole
-       * file byte-by-byte).
-       */ 
-      break;
-    case TEMPLATE_READING_FOO:
-      /* See if offset + sizeof(foo) < size, otherwise set seek_request to 
offset and return 1;
-       * If file_position is 0, and size is still to small, give up.
-       * Read FOO, maybe increase offset to reflect that (depends on the 
parser logic).
-       * Either process FOO right here, or jump to another state (see ebml 
plugin for an example of complex
-       * state-jumps).
-       * If FOO says you need to seek somewhere - set offset to seek_target - 
file_position and set the
-       * next state (next state will check that offset < size; all states that 
do reading should do that,
-       * and also check for EOF).
-       */
-      /* ... */
-      break;
-    }
-  }
-  /* Should not reach this */
+  /* Call pl_seek (plugin, POSITION, WHENCE) to seek (if you know where
+   * data starts.
+   */
+  /* Call pl_read (plugin, &data, COUNT) to read COUNT bytes (will be stored
+   * as data[0]..data[COUNT-1], no need to allocate data or free it; but it
+   * "goes away" when you make another read call, so store interesting values
+   * somewhere once you find them).
+   */
+  /* If you need to search for a magic id that is not at the beginning of the
+   * file, do pl_read() calls, reading sizable (1 megabyte or so) chunks,
+   * then use memchr() on them to find first byte of the magic sequence,
+   * then compare the rest of the sequence, if found.
+   * Mind the fact that you need to iterate over COUNT - SEQUENCE_LENGTH chars,
+   * and seek to POS + COUNT - SEQUENCE_LENGTH once you run out of bytes,
+   * otherwise you'd have a chance to skip bytes at chunk boundaries.
+   */
+  /* Do try to make a reasonable assumption about the amount of data you're
+   * going to search through. Iterating over the whole file, byte-by-byte is
+   * NOT a good idea, if the search itself is slow. Try to make the search as
+   * efficient as possible.
+   */
+  /* Avoid making long seeks backwards (for performance reasons)
+   */
+  /* pl_get_pos (plugin) will return current offset from the beginning of
+   * the file (i.e. index of the data[0] in the file, if you call pl_read
+   * at that point). You might need it do calculate forward-searches, if
+   * there are offsets stored within the file.
+   * pl_get_fsize (plugin) will return file size OR -1 if it is not known
+   * yet (file is not decompressed completely). Don't rely on fsize.
+   */
+  /* Seeking forward is safe
+   */
+  /* If you asked to read X bytes, but got less - it's EOF
+   */
+  /* Seeking backward a bit shouldn't hurt performance (i.e. read 4 bytes,
+   * then immediately seek 4 bytes back).
+   */
+  /* Don't read too much (you can't read more than MAX_READ from extractor.c,
+   * which is 32MB at the moment) in one call.
+   */
+  /* Once you find something, call proc(). If it returns non-0 - you're done.
+   */
+  /* Return 1 to indicate that you're done. */
+  /* Don't forget to free anything you've allocated before returning! */
   return 1;
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]