diff --git a/src/cut.c b/src/cut.c index bb2e641f7..8f156ad78 100644 --- a/src/cut.c +++ b/src/cut.c @@ -80,6 +80,9 @@ enum operating_mode /* Output characters that are in the given bytes. */ byte_mode, + /* Output characters that are in the given characters. */ + char_mode, + /* Output the given delimiter-separated fields. */ field_mode }; @@ -137,6 +140,40 @@ static struct option const longopts[] = {NULL, 0, NULL, 0} }; + +static +int getUTF8 (FILE* stream) +{ + int c, ch; + int n, i; + + c = getc (stream); + if (c == EOF) + return c; + if ((c >> 5) == 6) + n = 1; + else if ((c >> 4) == 14) + n = 2; + else if ((c >> 3) == 30) + n = 3; + else + n = 0; + + for (i = 0; i < n; i++) + { + ch = getc (stream); + if ((ch >> 6) == 2) + c = (c << 8) + ch; + else + { + ungetc (ch, stream); + break; + } + } + + return c; +} + void usage (int status) { @@ -280,6 +317,71 @@ cut_bytes (FILE *stream) } } + +/* Read from stream STREAM, printing to standard output any selected characters. */ + +static void +cut_chars (FILE *stream) +{ + uintmax_t char_idx; /* Number of bytes in the line so far. */ + /* Whether to begin printing delimiters between ranges for the current line. + Set after we've begun printing data corresponding to the first range. */ + bool print_delimiter; + + char_idx = 0; + print_delimiter = false; + current_rp = frp; + while (true) + { + int c; /* Each character from the file. */ + unsigned int ch; + int i; + char str[5]; + + c = getUTF8 (stream); + // c = getc (stream); + + if (c == line_delim) + { + putchar (c); + char_idx = 0; + print_delimiter = false; + current_rp = frp; + } + else if (c == EOF) + { + if (char_idx > 0) + putchar (line_delim); + break; + } + else + { + ch = *(unsigned int*) &c; + next_item (&char_idx); + if (print_kth (char_idx)) + { + if (output_delimiter_specified) + { + if (print_delimiter && is_range_start_index (char_idx)) + { + fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout); + } + print_delimiter = true; + } + + for (i = 3; i >= 0; i--, ch /= 256) + str[i] = ch % 256; + str[4] = 0; + + for (i = 0; i < 4; i++) + if (str[i] != 0) + putchar ((unsigned char) str[i]); + } + } + } +} + /* Read from stream STREAM, printing to standard output any selected fields. */ static void @@ -430,6 +532,8 @@ cut_stream (FILE *stream) { if (operating_mode == byte_mode) cut_bytes (stream); + else if (operating_mode == char_mode) + cut_chars (stream); else cut_fields (stream); } @@ -505,7 +609,6 @@ main (int argc, char **argv) switch (optc) { case 'b': - case 'c': /* Build the byte list. */ if (operating_mode != undefined_mode) FATAL_ERROR (_("only one type of list may be specified")); @@ -513,6 +616,14 @@ main (int argc, char **argv) spec_list_string = optarg; break; + case 'c': + /* Build the char list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); + operating_mode = char_mode; + spec_list_string = optarg; + break; + case 'f': /* Build the field list. */ if (operating_mode != undefined_mode)