[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: strread.m
From: |
John W. Eaton |
Subject: |
Re: strread.m |
Date: |
Thu, 4 Aug 2011 11:27:42 -0400 |
On 3-Aug-2011, Philip Nienhuis wrote:
| > I will probably try to write textscan in C++. It's up to you whether
| > you want to continue fixing problems in strread, but given the
|
| Do you have a time schedule in mind?
| That would help me make a better decision of what to do.
I started working on it yesterday. So far I've only implemented the
part that decodes the format. I'll try for at least some of the
conversions today. Then I may need help in figuring out how to
properly return the variables that are read from the file. Then we
will also need to handle the parameter/value options.
The diffs below are what I have now. You can do things like
fid = fopen ("any-existing-file");
xtextscan (fid, "any format here for testing")
and xtextscan will display the components of the format.
jwe
# HG changeset patch
# User John W. Eaton <address@hidden>
# Date 1312471485 14400
# Node ID 5860b88c35c5cdb5d81d9e78a9f3ff4033326004
# Parent 61906c0d1e9bce0b98d3e05a571549598eaaf99e
rewrite textscan in C++
* file-io.cc (Fxtextscan): New function.
* oct-stream.h, oct-stream.cc (textscan_format_elt,
textscan_format_list): New classes.
(octave_base_stream::do_textscan, octave_base_stream::textscan,
octave_stream::textscan): New functions.
diff --git a/src/file-io.cc b/src/file-io.cc
--- a/src/file-io.cc
+++ b/src/file-io.cc
@@ -1292,6 +1292,37 @@
return Ffscanf (tmp_args, nargout);
}
+DEFUN (xtextscan, args, ,
+ "-*- texinfo -*-\n\
address@hidden {Built-in Function} address@hidden, @var{pos}] =} textscan
(@var{fid}, @var{template}, @var{ntimes}, @var{param}, @var{val}, @dots{})\n\
address@hidden deftypefn")
+{
+ octave_value_list retval;
+
+ octave_stream os = octave_stream_list::lookup (args(0), "textscan");
+
+ if (! error_state)
+ {
+ if (args(1).is_string ())
+ {
+ octave_idx_type ntimes = -1;
+
+ octave_value tmp = os.textscan (args(1), ntimes);
+
+ if (! error_state)
+ {
+ // FIXME -- warn if stream is not opened in binary mode?
+ retval(1) = os.tell ();
+ retval(0) = tmp;
+ }
+ }
+ else
+ print_usage ();
+ }
+
+ return retval;
+}
+
static octave_value
do_fread (octave_stream& os, const octave_value& size_arg,
const octave_value& prec_arg, const octave_value& skip_arg,
diff --git a/src/oct-stream.cc b/src/oct-stream.cc
--- a/src/oct-stream.cc
+++ b/src/oct-stream.cc
@@ -34,7 +34,8 @@
#include <sstream>
#include <string>
-#include <Array.h>
+#include "Array.h"
+#include "Array.cc"
#include "byte-swap.h"
#include "lo-ieee.h"
@@ -573,6 +574,368 @@
// Ugh again.
+textscan_format_list::textscan_format_list (const std::string& s)
+ : nconv (0), curr_idx (0), list (dim_vector (16, 1)), buf (0)
+{
+ octave_idx_type num_elts = 0;
+
+ size_t n = s.length ();
+
+ size_t i = 0;
+
+ int width = 0;
+ int prec = 0;
+ int bitwidth = 0;
+ bool discard = false;
+ char type = '\0';
+
+ bool have_more = true;
+
+ while (i < n)
+ {
+ have_more = true;
+
+ if (! buf)
+ buf = new std::ostringstream ();
+
+ if (s[i] == '%')
+ {
+ // Process percent-escape conversion type.
+
+ process_conversion (s, i, n, width, prec, bitwidth,
+ discard, type, num_elts);
+
+ have_more = (buf != 0);
+ }
+ else if (isspace (s[i]))
+ {
+ type = textscan_format_elt::whitespace_conversion;
+
+ width = 0;
+ prec = 0;
+ bitwidth = 0;
+ discard = false;
+ *buf << " ";
+
+ while (++i < n && isspace (s[i]))
+ /* skip whitespace */;
+
+ add_elt_to_list (width, prec, bitwidth, discard, type, num_elts);
+
+ have_more = false;
+ }
+ else
+ {
+ type = textscan_format_elt::literal_conversion;
+
+ width = 0;
+ prec = 0;
+ bitwidth = 0;
+ discard = false;
+
+ while (i < n && ! isspace (s[i]) && s[i] != '%')
+ *buf << s[i++];
+
+ add_elt_to_list (width, prec, bitwidth, discard, type, num_elts);
+
+ have_more = false;
+ }
+
+ if (nconv < 0)
+ {
+ have_more = false;
+ break;
+ }
+ }
+
+ if (have_more)
+ add_elt_to_list (width, prec, bitwidth, discard, type, num_elts);
+
+ list.resize (dim_vector (num_elts, 1));
+
+ delete buf;
+}
+
+textscan_format_list::~textscan_format_list (void)
+{
+ octave_idx_type n = list.length ();
+
+ for (octave_idx_type i = 0; i < n; i++)
+ {
+ textscan_format_elt *elt = list(i);
+ delete elt;
+ }
+}
+
+void
+textscan_format_list::add_elt_to_list (int width, int prec, int bitwidth,
+ bool discard, char type,
+ octave_idx_type& num_elts,
+ const std::string& char_class)
+{
+ if (buf)
+ {
+ std::string text = buf->str ();
+
+ if (! text.empty ())
+ {
+ textscan_format_elt *elt
+ = new textscan_format_elt (text.c_str (), width, prec, bitwidth,
+ discard, type, char_class);
+
+ if (num_elts == list.length ())
+ list.resize (dim_vector (2 * num_elts, 1));
+
+ list(num_elts++) = elt;
+ }
+
+ delete buf;
+ buf = 0;
+ }
+}
+
+void
+textscan_format_list::process_conversion (const std::string& s, size_t& i,
+ size_t n, int& width, int& prec,
+ int& bitwidth, bool& discard,
+ char& type, octave_idx_type&
num_elts)
+{
+ width = 0;
+ prec = 0;
+ bitwidth = 0;
+ discard = false;
+ type = '\0';
+
+ *buf << s[i++];
+
+ bool have_width = false;
+
+ while (i < n)
+ {
+ switch (s[i])
+ {
+ case '*':
+ if (discard)
+ nconv = -1;
+ else
+ {
+ discard = true;
+ *buf << s[i++];
+ }
+ break;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (have_width)
+ nconv = -1;
+ else
+ {
+ char c = s[i++];
+ width = width * 10 + c - '0';
+ have_width = true;
+ *buf << c;
+ while (i < n && isdigit (s[i]))
+ {
+ c = s[i++];
+ width = width * 10 + c - '0';
+ *buf << c;
+ }
+
+ if (i < n && s[i] == '.')
+ {
+ *buf << s[i++];
+ while (i < n && isdigit (s[i]))
+ {
+ c = s[i++];
+ prec = prec * 10 + c - '0';
+ *buf << c;
+ }
+ }
+ }
+ break;
+
+ case 'd': case 'u':
+ if (i < n)
+ {
+ bitwidth = 32;
+
+ if (s[i] == 8)
+ {
+ *buf << s[i++];
+ bitwidth = 8;
+ }
+ else if (s[i] == 1 && i+1 < n && s[i+1] == 6)
+ {
+ *buf << s[i++];
+ *buf << s[i++];
+ }
+ else if (s[i] == 3 && i+1 < n && s[i+1] == 2)
+ {
+ *buf << s[i++];
+ *buf << s[i++];
+ }
+ else if (s[i] == 6 && i+1 < n && s[i+1] == 4)
+ {
+ *buf << s[i++];
+ *buf << s[i++];
+ }
+ }
+ goto fini;
+
+ case 'f':
+ if (i < n)
+ {
+ bitwidth = 64;
+
+ if (s[i] == 3 && i+1 < n && s[i+1] == 2)
+ {
+ *buf << s[i++];
+ *buf << s[i++];
+ }
+ else if (s[i] == 6 && i+1 < n && s[i+1] == 4)
+ {
+ *buf << s[i++];
+ *buf << s[i++];
+ }
+ }
+ goto fini;
+
+ case 'n':
+ bitwidth = 64;
+ goto fini;
+
+ case 's': case 'q': case '%': case '[':
+ goto fini;
+
+ fini:
+ {
+ if (finish_conversion (s, i, n, width, prec, bitwidth,
+ discard, type, num_elts) == 0)
+ return;
+ }
+ break;
+
+ default:
+ nconv = -1;
+ break;
+ }
+
+ if (nconv < 0)
+ break;
+ }
+
+ nconv = -1;
+}
+
+int
+textscan_format_list::finish_conversion (const std::string& s, size_t& i,
+ size_t n, int& width, int& prec,
+ int& bitwidth, bool discard,
+ char& type, octave_idx_type& num_elts)
+{
+ int retval = 0;
+
+ std::string char_class;
+
+ size_t beg_idx = std::string::npos;
+ size_t end_idx = std::string::npos;
+
+ if (s[i] == '%')
+ {
+ type = '%';
+ *buf << s[i++];
+ }
+ else
+ {
+ type = s[i];
+
+ if (s[i] == '[')
+ {
+ *buf << s[i++];
+
+ if (i < n)
+ {
+ beg_idx = i;
+
+ if (s[i] == '^')
+ {
+ type = '^';
+ *buf << s[i++];
+
+ if (i < n)
+ {
+ beg_idx = i;
+
+ if (s[i] == ']')
+ *buf << s[i++];
+ }
+ }
+ else if (s[i] == ']')
+ *buf << s[i++];
+ }
+
+ while (i < n && s[i] != ']')
+ *buf << s[i++];
+
+ if (i < n && s[i] == ']')
+ {
+ end_idx = i-1;
+ *buf << s[i++];
+ }
+
+ if (s[i-1] != ']')
+ retval = nconv = -1;
+ }
+ else
+ *buf << s[i++];
+
+ nconv++;
+ }
+
+ if (nconv >= 0)
+ {
+ if (beg_idx != std::string::npos && end_idx != std::string::npos)
+ char_class = expand_char_class (s.substr (beg_idx,
+ end_idx - beg_idx + 1));
+
+ add_elt_to_list (width, prec, bitwidth, discard, type,
+ num_elts, char_class);
+ }
+
+ return retval;
+}
+
+void
+textscan_format_list::printme (void) const
+{
+ octave_idx_type n = list.length ();
+
+ for (octave_idx_type i = 0; i < n; i++)
+ {
+ textscan_format_elt *elt = list(i);
+
+ std::cerr
+ << "width: " << elt->width << "\n"
+ << "digits " << elt->digits << "\n"
+ << "bitwidth: " << elt->bitwidth << "\n"
+ << "discard: " << elt->discard << "\n"
+ << "type: ";
+
+ if (elt->type == textscan_format_elt::literal_conversion)
+ std::cerr << "literal text\n";
+ else if (elt->type == textscan_format_elt::whitespace_conversion)
+ std::cerr << "whitespace\n";
+ else
+ std::cerr << elt->type << "\n";
+
+ std::cerr
+ << "char_class: `" << undo_string_escapes (elt->char_class) << "'\n"
+ << "text: `" << undo_string_escapes (elt->text) << "'\n\n";
+ }
+}
+
+// And again.
+
printf_format_list::printf_format_list (const std::string& s)
: nconv (0), curr_idx (0), list (dim_vector (16, 1)), buf (0)
{
@@ -2305,6 +2668,45 @@
return retval;
}
+octave_value
+octave_base_stream::do_textscan (textscan_format_list& fmt_list,
+ octave_idx_type ntimes)
+{
+ octave_value retval = Matrix ();
+
+ std::cerr << "textscan: this is when the conversion would happen"
+ << std::endl;
+
+ return retval;
+}
+
+octave_value
+octave_base_stream::textscan (const std::string& fmt, octave_idx_type ntimes)
+{
+ octave_value retval = Matrix ();
+
+ std::istream *isp = input_stream ();
+
+ if (isp)
+ {
+ textscan_format_list fmt_list (fmt);
+
+ fmt_list.printme ();
+
+ if (fmt_list.num_conversions () == -1)
+ ::error ("textscan: invalid format specified");
+ else
+ {
+ if (! error_state)
+ retval = do_textscan (fmt_list, ntimes);
+ }
+ }
+ else
+ invalid_operation ("textscan", "reading");
+
+ return retval;
+}
+
// Functions that are defined for all output streams (output streams
// are those that define os).
@@ -3860,6 +4262,41 @@
return retval;
}
+octave_value
+octave_stream::textscan (const std::string& fmt, octave_idx_type ntimes)
+{
+ octave_value retval;
+
+ if (stream_ok ())
+ retval = rep->textscan (fmt, ntimes);
+
+ return retval;
+}
+
+octave_value
+octave_stream::textscan (const octave_value& fmt, octave_idx_type ntimes)
+{
+ octave_value retval = Matrix ();
+
+ if (fmt.is_string ())
+ {
+ std::string sfmt = fmt.string_value ();
+
+ if (fmt.is_sq_string ())
+ sfmt = do_string_escapes (sfmt);
+
+ retval = textscan (sfmt, ntimes);
+ }
+ else
+ {
+ // Note that this is not ::error () !
+
+ error ("textscan: format must be a string");
+ }
+
+ return retval;
+}
+
int
octave_stream::printf (const std::string& fmt, const octave_value_list& args,
const std::string& who)
diff --git a/src/oct-stream.h b/src/oct-stream.h
--- a/src/oct-stream.h
+++ b/src/oct-stream.h
@@ -184,6 +184,155 @@
};
class
+OCTINTERP_API
+textscan_format_elt
+{
+public:
+
+ enum special_conversion
+ {
+ whitespace_conversion = 1,
+ literal_conversion = 2
+ };
+
+ textscan_format_elt (const char *txt = 0, int w = 0, int d = 0,
+ int bw = 0, bool dis = false, char typ = '\0',
+ const std::string& ch_class = std::string ())
+ : text (strsave (txt)), width (w), digits (d), bitwidth (bw),
+ discard (dis), type (typ), char_class (ch_class) { }
+
+ textscan_format_elt (const textscan_format_elt& e)
+ : text (strsave (e.text)), width (e.width), digits (e.digits),
+ bitwidth (e.bitwidth), discard (e.discard), type (e.type),
+ char_class (e.char_class) { }
+
+ textscan_format_elt& operator = (const textscan_format_elt& e)
+ {
+ if (this != &e)
+ {
+ text = strsave (e.text);
+ width = e.width;
+ digits = e.digits;
+ bitwidth = e.bitwidth;
+ discard = e.discard;
+ type = e.type;
+ char_class = e.char_class;
+ }
+
+ return *this;
+ }
+
+ ~textscan_format_elt (void) { delete [] text; }
+
+ // The C-style format string.
+ const char *text;
+
+ // The maximum field width.
+ int width;
+
+ // The maximum number of digits to read after the decimal in a
+ // floating point conversion.
+ int digits;
+
+ // The size of the result. For integers, bitwidth may be 8, 16, 34,
+ // or 64. For floating point values, bitwidth may be 32 or 64.
+ int bitwidth;
+
+ // TRUE if we are not storing the result of this conversion.
+ bool discard;
+
+ // Type of conversion -- `d', `u', `f', `n', `s', `q', `c', `%', or `['.
+ char type;
+
+ // The class of characters in a `[' format.
+ std::string char_class;
+};
+
+class
+OCTINTERP_API
+textscan_format_list
+{
+public:
+
+ textscan_format_list (const std::string& fmt = std::string ());
+
+ ~textscan_format_list (void);
+
+ octave_idx_type num_conversions (void) { return nconv; }
+
+ // The length can be different than the number of conversions.
+ // For example, "x %d y %d z" has 2 conversions but the length of
+ // the list is 3 because of the characters that appear after the
+ // last conversion.
+
+ octave_idx_type length (void) { return list.length (); }
+
+ const textscan_format_elt *first (void)
+ {
+ curr_idx = 0;
+ return current ();
+ }
+
+ const textscan_format_elt *current (void) const
+ { return list.length () > 0 ? list.elem (curr_idx) : 0; }
+
+ const textscan_format_elt *next (bool cycle = true)
+ {
+ curr_idx++;
+
+ if (curr_idx >= list.length ())
+ {
+ if (cycle)
+ curr_idx = 0;
+ else
+ return 0;
+ }
+ return current ();
+ }
+
+ void printme (void) const;
+
+ bool ok (void) const { return (nconv >= 0); }
+
+ operator bool () const { return ok (); }
+
+private:
+
+ // Number of conversions specified by this format string, or -1 if
+ // invalid conversions have been found.
+ octave_idx_type nconv;
+
+ // Index to current element;
+ octave_idx_type curr_idx;
+
+ // FIXME -- maybe LIST should be a std::list object?
+ // List of format elements.
+ Array<textscan_format_elt*> list;
+
+ // Temporary buffer.
+ std::ostringstream *buf;
+
+ void add_elt_to_list (int width, int digits, int bitwidth, bool discard,
+ char type, octave_idx_type& num_elts,
+ const std::string& char_class = std::string ());
+
+ void process_conversion (const std::string& s, size_t& i, size_t n,
+ int& width, int& digits, int& bitwidth,
+ bool& discard, char& type,
+ octave_idx_type& num_elts);
+
+ int finish_conversion (const std::string& s, size_t& i, size_t n,
+ int& width, int& digits, int& bitwidth,
+ bool discard, char& type,
+ octave_idx_type& num_elts);
+ // No copying!
+
+ textscan_format_list (const textscan_format_list&);
+
+ textscan_format_list& operator = (const textscan_format_list&);
+};
+
+class
printf_format_elt
{
public:
@@ -468,6 +617,11 @@
octave_value_list oscanf (const std::string& fmt,
const std::string& who /* = "scanf" */);
+ octave_value do_textscan (textscan_format_list& fmt_list,
+ octave_idx_type ntimes);
+
+ octave_value textscan (const std::string& fmt, octave_idx_type ntimes);
+
// Functions that are defined for all output streams (output streams
// are those that define os).
@@ -558,6 +712,10 @@
octave_value_list oscanf (const octave_value& fmt,
const std::string& who /* = "scanf" */);
+ octave_value textscan (const std::string& fmt, octave_idx_type ntimes);
+
+ octave_value textscan (const octave_value& fmt, octave_idx_type ntimes);
+
int printf (const std::string& fmt, const octave_value_list& args,
const std::string& who /* = "printf" */);
- Re: Release goals for 3.6, (continued)
- Re: Release goals for 3.6, PhilipNienhuis, 2011/08/02
- strread.m (was: Re: Release goals for 3.6), John W. Eaton, 2011/08/02
- Re: strread.m, Philip Nienhuis, 2011/08/02
- Re: strread.m, John W. Eaton, 2011/08/02
- Re: strread.m, Philip Nienhuis, 2011/08/02
- Re: strread.m, John W. Eaton, 2011/08/02
- Re: strread.m, Philip Nienhuis, 2011/08/03
- Re: strread.m, John W. Eaton, 2011/08/03
- Re: strread.m, Philip Nienhuis, 2011/08/03
- Re: strread.m,
John W. Eaton <=
- xtextscan [WAS: Re: strread.m], Philip Nienhuis, 2011/08/04
- Re: strread.m, Ben Abbott, 2011/08/04
- Re: strread.m, Ben Abbott, 2011/08/02
- Re: strread.m, John W. Eaton, 2011/08/02
Re: Release goals for 3.6, Konstantinos Poulios, 2011/08/03