[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: gzipped data files
From: |
John W. Eaton |
Subject: |
Re: gzipped data files |
Date: |
Fri, 29 Apr 2005 16:46:13 -0400 |
On 29-Apr-2005, David Bateman wrote:
| Dmitri A. Sergatskov wrote:
|
| > Since we now have all this gzip machinery in place,
| > would it be easy to add gzip option to fopen()?
| > That is, I would like to be able to do something like:
| >
| > fid = fopen("data.dat.gz","rz")
| >
| > a = fscanf(fid,"%f")
| >
| > ...
| >
| > Just asking...
| >
| > Dmitri.
|
| Something like that is relatively easy, as gzopen just a C file
| descriptor.
? Does it really work to pass a gzFile object to any of the stdio
functions that expect a FILE pointer?
Anyway, try the following patch.
Thanks,
jwe
src/ChangeLog:
2005-04-29 John W. Eaton <address@hidden>
* c-file-ptr-stream.h (c_file_ptr_stream): New template class,
converted from i_c_file_ptr_stream.
(i_c_file_ptr_stream, o_c_file_ptr_stream, io_c_file_ptr_stream):
Now typedefs.
(i_c_zfile_ptr_stream, o_c_zfile_ptr_stream, io_c_zfile_ptr_stream):
New typedefs.
* c-file-ptr-stream.h, c-file-ptr-stream.cc (c_zfile_ptr_buf):
New class.
* oct-stdstrm.h (class octave_tstdiostream): New template class,
converted from octave_stdiostream.
(octave_stdiostream): Now a typedef.
[HAVE_ZLIB] (octave_zstdiostream): New a typedef.
* oct-stdstrm.cc: Delete.
* Makefile.in (DIST_SRC): Remove it from the list.
Index: src/Makefile.in
===================================================================
RCS file: /cvs/octave/src/Makefile.in,v
retrieving revision 1.375
diff -u -r1.375 Makefile.in
--- src/Makefile.in 29 Apr 2005 13:04:25 -0000 1.375
+++ src/Makefile.in 29 Apr 2005 20:35:46 -0000
@@ -168,7 +168,7 @@
ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \
ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \
oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \
- oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stdstrm.cc \
+ oct-obj.cc oct-prcstrm.cc oct-procbuf.cc \
oct-stream.cc zfstream.cc oct-strstrm.cc oct-lvalue.cc pager.cc \
parse.y pr-output.cc procstream.cc sighandlers.cc \
siglist.c sparse-xdiv.cc sparse-xpow.cc strcasecmp.c \
Index: src/c-file-ptr-stream.cc
===================================================================
RCS file: /cvs/octave/src/c-file-ptr-stream.cc,v
retrieving revision 1.16
diff -u -r1.16 c-file-ptr-stream.cc
--- src/c-file-ptr-stream.cc 26 Apr 2005 19:24:32 -0000 1.16
+++ src/c-file-ptr-stream.cc 29 Apr 2005 20:35:46 -0000
@@ -190,9 +190,158 @@
return retval;
}
+#ifdef HAVE_ZLIB
+
+c_zfile_ptr_buf::~c_zfile_ptr_buf (void)
+{
+ close ();
+}
+
+// XXX FIXME XXX -- I'm sure there is room for improvement here...
+
+c_zfile_ptr_buf::int_type
+c_zfile_ptr_buf::overflow (int_type c)
+{
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+ if (f)
+ return (c != traits_type::eof ()) ? gzputc (f, c) : flush ();
+ else
+ return traits_type::not_eof (c);
+#else
+ if (f)
+ return (c != EOF) ? gzputc (f, c) : flush ();
+ else
+ return EOF;
+#endif
+}
+
+c_zfile_ptr_buf::int_type
+c_zfile_ptr_buf::underflow_common (bool bump)
+{
+ if (f)
+ {
+ int_type c = gzgetc (f);
+
+ if (! bump
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+ && c != traits_type::eof ())
+#else
+ && c != EOF)
+#endif
+ gzungetc (c, f);
+
+ return c;
+ }
+ else
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+ return traits_type::eof ();
+#else
+ return EOF;
+#endif
+}
+
+c_zfile_ptr_buf::int_type
+c_zfile_ptr_buf::pbackfail (int_type c)
+{
+#if defined (CXX_ISO_COMPLIANT_LIBRARY)
+ return (c != traits_type::eof () && f) ? gzungetc (c, f) :
+ traits_type::not_eof (c);
+#else
+ return (c != EOF && f) ? gzungetc (c, f) : EOF;
+#endif
+}
+
+std::streamsize
+c_zfile_ptr_buf::xsputn (const char* s, std::streamsize n)
+{
+ if (f)
+ return gzwrite (f, s, n);
+ else
+ return 0;
+}
+
+std::streamsize
+c_zfile_ptr_buf::xsgetn (char *s, std::streamsize n)
+{
+ if (f)
+ return gzread (f, s, n);
+ else
+ return 0;
+}
+
+std::streampos
+c_zfile_ptr_buf::seekoff (std::streamoff offset, std::ios::seekdir dir,
+ std::ios::openmode)
+{
+ // XXX FIXME XXX
+#if 0
+ if (f)
+ {
+ gzseek (f, offset, seekdir_to_whence (dir));
+
+ return gztell (f);
+ }
+ else
+ return 0;
+#endif
+ return -1;
+}
+
+std::streampos
+c_zfile_ptr_buf::seekpos (std::streampos offset, std::ios::openmode)
+{
+ // XXX FIXME XXX
+#if 0
+ if (f)
+ {
+ gzseek (f, offset, SEEK_SET);
+
+ return gztell (f);
+ }
+ else
+ return 0;
+#endif
+ return -1;
+}
+
+int
+c_zfile_ptr_buf::sync (void)
+{
+ flush ();
+
+ return 0;
+}
+
+int
+c_zfile_ptr_buf::flush (void)
+{
+ // XXX FIXME XXX -- do we need something more complex here, passing
+ // something other than 0 for the second argument to gzflush and
+ // checking the return value, etc.?
+
+ return f ? gzflush (f, 0) : EOF;
+}
+
+int
+c_zfile_ptr_buf::close (void)
+{
+ int retval = -1;
+
+ flush ();
+
+ if (f)
+ {
+ retval = cf (f);
+ f = 0;
+ }
+
+ return retval;
+}
+
+#endif
+
/*
;;; Local Variables: ***
;;; mode: C++ ***
;;; End: ***
*/
-
Index: src/c-file-ptr-stream.h
===================================================================
RCS file: /cvs/octave/src/c-file-ptr-stream.h,v
retrieving revision 1.19
diff -u -r1.19 c-file-ptr-stream.h
--- src/c-file-ptr-stream.h 26 Apr 2005 19:24:32 -0000 1.19
+++ src/c-file-ptr-stream.h 29 Apr 2005 20:35:47 -0000
@@ -94,18 +94,21 @@
int_type underflow_common (bool);
};
+// XXX FIXME XXX -- the following three classes could probably share
+// some code...
+
+template <typename STREAM_T, typename FILE_T, typename BUF_T>
class
-i_c_file_ptr_stream : public std::istream
+c_file_ptr_stream : public STREAM_T
{
public:
- i_c_file_ptr_stream (FILE* f,
- c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
- : std::istream (0), buf (new c_file_ptr_buf (f, cf)) { init (buf); }
+ c_file_ptr_stream (FILE_T f, typename BUF_T::close_fcn cf = BUF_T::fclose)
+ : STREAM_T (0), buf (new BUF_T (f, cf)) { init (buf); }
- ~i_c_file_ptr_stream (void) { delete buf; buf = 0; }
+ ~c_file_ptr_stream (void) { delete buf; buf = 0; }
- c_file_ptr_buf *rdbuf (void) { return buf; }
+ BUF_T *rdbuf (void) { return buf; }
void close (void) { if (buf) buf->close (); }
@@ -114,67 +117,96 @@
long tell (void) { return buf ? buf->tell () : -1; }
- void clear (void) { if (buf) buf->clear (); std::istream::clear (); }
+ void clear (void) { if (buf) buf->clear (); STREAM_T::clear (); }
private:
- c_file_ptr_buf *buf;
+ BUF_T *buf;
};
+typedef c_file_ptr_stream<std::istream, FILE *, c_file_ptr_buf>
i_c_file_ptr_stream;
+typedef c_file_ptr_stream<std::ostream, FILE *, c_file_ptr_buf>
o_c_file_ptr_stream;
+typedef c_file_ptr_stream<std::iostream, FILE *, c_file_ptr_buf>
io_c_file_ptr_stream;
+
+#ifdef HAVE_ZLIB
+
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
class
-o_c_file_ptr_stream : public std::ostream
+c_zfile_ptr_buf : public std::streambuf
{
public:
- o_c_file_ptr_stream (FILE* f,
- c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
- : std::ostream (0), buf (new c_file_ptr_buf (f, cf)) { init (buf); }
+#if !defined (CXX_ISO_COMPLIANT_LIBRARY)
+ typedef int int_type;
+#else
+ typedef std::streambuf::int_type int_type;
+#endif
- ~o_c_file_ptr_stream (void) { delete buf; buf = 0; }
+ typedef int (*close_fcn) (gzFile);
- c_file_ptr_buf *rdbuf (void) { return buf; }
+ gzFile stdiofile (void) { return f; }
- void close (void) { if (buf) buf->close (); }
+ c_zfile_ptr_buf (gzFile f_arg, close_fcn cf_arg = fclose)
+ : std::streambuf (), f (f_arg), cf (cf_arg)
+ { }
- int seek (long offset, int origin)
- { return buf ? buf->seek (offset, origin) : -1; }
+ ~c_zfile_ptr_buf (void);
- long tell (void) { return buf ? buf->tell () : -1; }
+ int_type overflow (int_type);
- void clear (void) { if (buf) buf->clear (); std::ostream::clear (); }
+ int_type underflow (void) { return underflow_common (false); }
-private:
+ int_type uflow (void) { return underflow_common (true); }
- c_file_ptr_buf *buf;
-};
+ int_type pbackfail (int_type);
-class
-io_c_file_ptr_stream : public std::iostream
-{
-public:
+ std::streamsize xsputn (const char*, std::streamsize);
- io_c_file_ptr_stream (FILE* f,
- c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
- : std::iostream (0), buf (new c_file_ptr_buf (f, cf)) { init (buf); }
+ std::streamsize xsgetn (char *, std::streamsize);
- ~io_c_file_ptr_stream (void) { delete buf; buf = 0; }
+ std::streampos seekoff (std::streamoff, std::ios::seekdir,
+ std::ios::openmode = std::ios::in | std::ios::out);
+
+ std::streampos seekpos (std::streampos,
+ std::ios::openmode = std::ios::in | std::ios::out);
- c_file_ptr_buf *rdbuf (void) { return buf; }
+ int sync (void);
- void close (void) { if (buf) buf->close (); }
+ int flush (void);
+
+ int close (void);
+
+ int file_number () const { return -1; }
int seek (long offset, int origin)
- { return buf ? buf->seek (offset, origin) : -1; }
+ { return f ? gzseek (f, offset, origin) : -1; }
- long tell (void) { return buf ? buf->tell () : -1; }
+ long tell (void) { return f ? gztell (f) : -1; }
+
+ void clear (void) { if (f) gzclearerr (f); }
- void clear (void) { if (buf) buf->clear (); std::iostream::clear (); }
+ static int fclose (gzFile f) { return ::gzclose (f); }
+
+protected:
+
+ gzFile f;
+
+ close_fcn cf;
private:
- c_file_ptr_buf *buf;
+ int_type underflow_common (bool);
};
+typedef c_file_ptr_stream<std::istream, gzFile, c_zfile_ptr_buf>
i_c_zfile_ptr_stream;
+typedef c_file_ptr_stream<std::ostream, gzFile, c_zfile_ptr_buf>
o_c_zfile_ptr_stream;
+typedef c_file_ptr_stream<std::iostream, gzFile, c_zfile_ptr_buf>
io_c_zfile_ptr_stream;
+
+#endif
+
#endif
/*
Index: src/file-io.cc
===================================================================
RCS file: /cvs/octave/src/file-io.cc,v
retrieving revision 1.166
diff -u -r1.166 file-io.cc
--- src/file-io.cc 29 Apr 2005 04:47:55 -0000 1.166
+++ src/file-io.cc 29 Apr 2005 20:35:47 -0000
@@ -53,6 +53,10 @@
#include <unistd.h>
#endif
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
#include "error.h"
#include "file-ops.h"
#include "lo-ieee.h"
@@ -126,42 +130,58 @@
}
static std::ios::openmode
-fopen_mode_to_ios_mode (const std::string& mode)
+fopen_mode_to_ios_mode (const std::string& mode_arg)
{
std::ios::openmode retval = std::ios::in;
- if (! mode.empty ())
+ if (! mode_arg.empty ())
{
// Could probably be faster, but does it really matter?
- if (mode == "rt")
- retval = std::ios::in;
- else if (mode == "wt")
- retval = std::ios::out | std::ios::trunc;
- else if (mode == "at")
- retval = std::ios::out | std::ios::app;
- else if (mode == "r+t")
- retval = std::ios::in | std::ios::out;
- else if (mode == "w+t")
- retval = std::ios::in | std::ios::out | std::ios::trunc;
- else if (mode == "a+t")
- retval = std::ios::in | std::ios::out | std::ios::ate;
- else if (mode == "rb" || mode == "r")
- retval = std::ios::in | std::ios::binary;
- else if (mode == "wb" || mode == "w")
- retval = std::ios::out | std::ios::trunc | std::ios::binary;
- else if (mode == "ab" || mode == "a")
- retval = std::ios::out | std::ios::app | std::ios::binary;
- else if (mode == "r+b" || mode == "r+")
- retval = std::ios::in | std::ios::out | std::ios::binary;
- else if (mode == "w+b" || mode == "w+")
- retval = (std::ios::in | std::ios::out | std::ios::trunc
- | std::ios::binary);
- else if (mode == "a+b" || mode == "a+")
- retval = (std::ios::in | std::ios::out | std::ios::ate
- | std::ios::binary);
- else
- ::error ("invalid mode specified");
+ std::string mode = mode_arg;
+
+ size_t pos = mode.find ('z');
+
+ if (pos != NPOS)
+ {
+#if defined (HAVE_ZLIB)
+ mode.erase (pos, 1);
+#else
+ error ("this version of Octave does not support gzipped files");
+#endif
+ }
+
+ if (! error_state)
+ {
+ if (mode == "rt")
+ retval = std::ios::in;
+ else if (mode == "wt")
+ retval = std::ios::out | std::ios::trunc;
+ else if (mode == "at")
+ retval = std::ios::out | std::ios::app;
+ else if (mode == "r+t")
+ retval = std::ios::in | std::ios::out;
+ else if (mode == "w+t")
+ retval = std::ios::in | std::ios::out | std::ios::trunc;
+ else if (mode == "a+t")
+ retval = std::ios::in | std::ios::out | std::ios::ate;
+ else if (mode == "rb" || mode == "r")
+ retval = std::ios::in | std::ios::binary;
+ else if (mode == "wb" || mode == "w")
+ retval = std::ios::out | std::ios::trunc | std::ios::binary;
+ else if (mode == "ab" || mode == "a")
+ retval = std::ios::out | std::ios::app | std::ios::binary;
+ else if (mode == "r+b" || mode == "r+")
+ retval = std::ios::in | std::ios::out | std::ios::binary;
+ else if (mode == "w+b" || mode == "w+")
+ retval = (std::ios::in | std::ios::out | std::ios::trunc
+ | std::ios::binary);
+ else if (mode == "a+b" || mode == "a+")
+ retval = (std::ios::in | std::ios::out | std::ios::ate
+ | std::ios::binary);
+ else
+ ::error ("invalid mode specified");
+ }
}
return retval;
@@ -386,15 +406,39 @@
if (! error_state)
{
- FILE *fptr = ::fopen (name.c_str (), mode.c_str ());
+#if defined (HAVE_ZLIB)
+ std::string tmode = mode;
- retval = octave_stdiostream::create (name, fptr, md, flt_fmt);
+ size_t pos = tmode.find ('z');
- if (! fptr)
+ if (pos != NPOS)
{
- using namespace std;
- retval.error (::strerror (errno));
+ tmode.erase (pos, 1);
+
+ gzFile fptr = ::gzopen (name.c_str (), tmode.c_str ());
+
+ if (fptr)
+ retval = octave_zstdiostream::create (name, fptr, md, flt_fmt);
+ else
+ {
+ using namespace std;
+ retval.error (::strerror (errno));
+ }
}
+ else
+#endif
+ {
+ FILE *fptr = ::fopen (name.c_str (), mode.c_str ());
+
+ if (fptr)
+ retval = octave_stdiostream::create (name, fptr, md, flt_fmt);
+ else
+ {
+ using namespace std;
+ retval.error (::strerror (errno));
+ }
+ }
+
}
}
Index: src/oct-stdstrm.h
===================================================================
RCS file: /cvs/octave/src/oct-stdstrm.h,v
retrieving revision 1.25
diff -u -r1.25 oct-stdstrm.h
--- src/oct-stdstrm.h 26 Apr 2005 19:24:33 -0000 1.25
+++ src/oct-stdstrm.h 29 Apr 2005 20:35:47 -0000
@@ -27,39 +27,39 @@
#include "oct-stream.h"
#include "c-file-ptr-stream.h"
+template <typename BUF_T, typename STREAM_T, typename FILE_T>
class
-octave_stdiostream : public octave_base_stream
+octave_tstdiostream : public octave_base_stream
{
public:
- octave_stdiostream (const std::string& n, FILE *f = 0,
- std::ios::openmode m = std::ios::in|std::ios::out,
- oct_mach_info::float_format ff
- = oct_mach_info::native_float_format (),
- c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
- : octave_base_stream (m, ff), nm (n), md (m), s(0)
- {
- if (f)
- s = new io_c_file_ptr_stream (f, cf);
- }
+ octave_tstdiostream (const std::string& n, FILE_T f = 0,
+ std::ios::openmode m = std::ios::in|std::ios::out,
+ oct_mach_info::float_format ff
+ = oct_mach_info::native_float_format (),
+ typename BUF_T::close_fcn cf = BUF_T::fclose)
+ : octave_base_stream (m, ff), nm (n), md (m),
+ s(f ? new STREAM_T (f, cf) : 0)
+ { }
static octave_stream
- create (const std::string& n, FILE *f = 0,
+ create (const std::string& n, FILE_T f = 0,
std::ios::openmode m = std::ios::in|std::ios::out,
oct_mach_info::float_format ff
= oct_mach_info::native_float_format (),
- c_file_ptr_buf::close_fcn cf = c_file_ptr_buf::fclose)
+ typename BUF_T::close_fcn cf = BUF_T::fclose)
{
- return octave_stream (new octave_stdiostream (n, f, m, ff, cf));
+ return octave_stream (new octave_tstdiostream (n, f, m, ff, cf));
}
// Position a stream at OFFSET relative to ORIGIN.
- int seek (long offset, int origin);
+ int seek (long offset, int origin)
+ { return s ? s->seek (offset, origin) : -1; }
// Return current stream position.
- long tell (void);
+ long tell (void) { return s ? s->tell () : -1; }
// Return non-zero if EOF has been reached on this stream.
@@ -74,8 +74,8 @@
std::ostream *output_stream (void) { return (md & std::ios::out) ? s : 0; }
// XXX FIXME XXX -- should not have to cast away const here.
- c_file_ptr_buf *rdbuf (void) const
- { return s ? (const_cast<io_c_file_ptr_stream *> (s))->rdbuf () : 0; }
+ BUF_T *rdbuf (void) const
+ { return s ? (const_cast<STREAM_T *> (s))->rdbuf () : 0; }
bool bad (void) const { return s ? s->bad () : true; }
@@ -89,19 +89,27 @@
std::ios::openmode md;
- io_c_file_ptr_stream *s;
+ STREAM_T *s;
- ~octave_stdiostream (void) { delete s; }
+ ~octave_tstdiostream (void) { delete s; }
private:
// No copying!
- octave_stdiostream (const octave_stdiostream&);
+ octave_tstdiostream (const octave_tstdiostream&);
- octave_stdiostream& operator = (const octave_stdiostream&);
+ octave_tstdiostream& operator = (const octave_tstdiostream&);
};
+typedef octave_tstdiostream<c_file_ptr_buf, io_c_file_ptr_stream, FILE *>
octave_stdiostream;
+
+#ifdef HAVE_ZLIB
+
+typedef octave_tstdiostream<c_zfile_ptr_buf, io_c_zfile_ptr_stream, gzFile>
octave_zstdiostream;
+
+#endif
+
#endif
/*