From 6c2002b20ac63a2974e9a6f5de4f5f10a870602d Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 6 May 2014 18:38:09 -0700 Subject: [PATCH] dd: fix conv=ascii, conv=ebcdic, conv=ibm to match POSIX Problem reported by Don Baggett in . * NEWS: * doc/coreutils.texi (dd invocation): Document this. * src/dd.c (conversions): conv=ascii implies conv=unblock. conv=ebcdic and conv=ibm imply conv=block. (ascii_to_ebcdic, ebcdic_to_ascii): Correct to match POSIX 1003.1-2013. * tests/dd/ascii.sh: New file. * tests/local.mk (all_tests): Add it. --- NEWS | 21 ++++++++++++++++ doc/coreutils.texi | 11 ++++++--- src/dd.c | 37 ++++++++++++++++------------ tests/dd/ascii.sh | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/local.mk | 1 + 5 files changed, 122 insertions(+), 19 deletions(-) create mode 100755 tests/dd/ascii.sh diff --git a/NEWS b/NEWS index f7b5112..4efd60d 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,27 @@ GNU coreutils NEWS -*- outline -*- date could crash or go into an infinite loop when parsing a malformed TZ="". [bug introduced with the --date='TZ="" ..' parsing feature in coreutils-5.3.0] + dd's ASCII and EBCDIC conversions were incompatible with common practice and + with POSIX, and have been corrected as follows. First, conv=ascii now + implies conv=unblock, and conv=ebcdic and conv=ibm now imply conv=block. + Second, the translation tables for dd conv=ascii and conv=ebcdic have been + corrected as shown in the following table, where A is the ASCII value, W is + the old, wrong EBCDIC value, and E is the new, corrected EBCDIC value; all + values are in octal. + + A W E + 041 117 132 + 133 112 255 + 135 132 275 + 136 137 232 + 174 152 117 + 176 241 137 + 313 232 152 + 325 255 112 + 345 275 241 + + [These dd bugs were present in "the beginning".] + head --bytes=-N and --lines=-N now handles devices more consistently, not ignoring data from virtual devices like /dev/zero, or on BSD systems data from tty devices. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index a949ffc..789cd68 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -8621,21 +8621,26 @@ Conversions: Convert EBCDIC to ASCII, using the conversion table specified by address@hidden This provides a 1:1 translation for all 256 bytes. +This option implies @samp{conv=unblock}; input is converted to +ASCII before trailing spaces are deleted. @item ebcdic @opindex address@hidden, converting to} Convert ASCII to address@hidden This is the inverse of the @samp{ascii} conversion. +This option implies @samp{conv=block}; trailing spaces are added +before being converted to address@hidden @item ibm @opindex alternate address@hidden, converting to} -Convert ASCII to alternate EBCDIC, -using the alternate conversion table specified by address@hidden +This acts like @samp{conv=ebcdic}, except it +uses the alternate conversion table specified by address@hidden This is not a 1:1 translation, but reflects common historical practice for @samp{~}, @samp{[}, and @samp{]}. The @samp{ascii}, @samp{ebcdic}, and @samp{ibm} conversions are -mutually exclusive. +mutually exclusive. If you use any of these options, you should also +use the @samp{cbs=} option. @item block @opindex block @r{(space-padding)} diff --git a/src/dd.c b/src/dd.c index c7909e7..1e387f3 100644 --- a/src/dd.c +++ b/src/dd.c @@ -274,9 +274,9 @@ struct symbol_value /* Conversion symbols, for conv="...". */ static struct symbol_value const conversions[] = { - {"ascii", C_ASCII | C_TWOBUFS}, /* EBCDIC to ASCII. */ - {"ebcdic", C_EBCDIC | C_TWOBUFS}, /* ASCII to EBCDIC. */ - {"ibm", C_IBM | C_TWOBUFS}, /* Slightly different ASCII to EBCDIC. */ + {"ascii", C_ASCII | C_UNBLOCK | C_TWOBUFS}, /* EBCDIC to ASCII. */ + {"ebcdic", C_EBCDIC | C_BLOCK | C_TWOBUFS}, /* ASCII to EBCDIC. */ + {"ibm", C_IBM | C_BLOCK | C_TWOBUFS}, /* Different ASCII to EBCDIC. */ {"block", C_BLOCK | C_TWOBUFS}, /* Variable to fixed length records. */ {"unblock", C_UNBLOCK | C_TWOBUFS}, /* Fixed to variable length records. */ {"lcase", C_LCASE | C_TWOBUFS}, /* Translate upper to lower case. */ @@ -381,24 +381,29 @@ static struct symbol_value const statuses[] = /* Translation table formed by applying successive transformations. */ static unsigned char trans_table[256]; +/* Standard translation tables, taken from POSIX 1003.1-2013. + Beware of imitations; there are lots of ASCII<->EBCDIC tables + floating around the net, perhaps valid for some applications but + not correct here. */ + static char const ascii_to_ebcdic[] = { '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057', '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046', '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037', - '\100', '\117', '\177', '\173', '\133', '\154', '\120', '\175', + '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175', '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141', '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157', '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307', '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326', '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346', - '\347', '\350', '\351', '\112', '\340', '\132', '\137', '\155', + '\347', '\350', '\351', '\255', '\340', '\275', '\232', '\155', '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207', '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226', '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246', - '\247', '\250', '\251', '\300', '\152', '\320', '\241', '\007', + '\247', '\250', '\251', '\300', '\117', '\320', '\137', '\007', '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027', '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033', '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010', @@ -408,10 +413,10 @@ static char const ascii_to_ebcdic[] = '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215', - '\216', '\217', '\220', '\232', '\233', '\234', '\235', '\236', - '\237', '\240', '\252', '\253', '\254', '\255', '\256', '\257', + '\216', '\217', '\220', '\152', '\233', '\234', '\235', '\236', + '\237', '\240', '\252', '\253', '\254', '\112', '\256', '\257', '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\270', '\271', '\272', '\273', '\274', '\241', '\276', '\277', '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333', '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355', '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377' @@ -464,21 +469,21 @@ static char const ebcdic_to_ascii[] = '\220', '\221', '\026', '\223', '\224', '\225', '\226', '\004', '\230', '\231', '\232', '\233', '\024', '\025', '\236', '\032', '\040', '\240', '\241', '\242', '\243', '\244', '\245', '\246', - '\247', '\250', '\133', '\056', '\074', '\050', '\053', '\041', + '\247', '\250', '\325', '\056', '\074', '\050', '\053', '\174', '\046', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\135', '\044', '\052', '\051', '\073', '\136', + '\260', '\261', '\041', '\044', '\052', '\051', '\073', '\176', '\055', '\057', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\174', '\054', '\045', '\137', '\076', '\077', + '\270', '\271', '\313', '\054', '\045', '\137', '\076', '\077', '\272', '\273', '\274', '\275', '\276', '\277', '\300', '\301', '\302', '\140', '\072', '\043', '\100', '\047', '\075', '\042', '\303', '\141', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151', '\304', '\305', '\306', '\307', '\310', '\311', '\312', '\152', '\153', '\154', '\155', '\156', '\157', '\160', - '\161', '\162', '\313', '\314', '\315', '\316', '\317', '\320', - '\321', '\176', '\163', '\164', '\165', '\166', '\167', '\170', - '\171', '\172', '\322', '\323', '\324', '\325', '\326', '\327', + '\161', '\162', '\136', '\314', '\315', '\316', '\317', '\320', + '\321', '\345', '\163', '\164', '\165', '\166', '\167', '\170', + '\171', '\172', '\322', '\323', '\324', '\133', '\326', '\327', '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\340', '\341', '\342', '\343', '\344', '\135', '\346', '\347', '\173', '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110', '\111', '\350', '\351', '\352', '\353', '\354', '\355', '\175', '\112', '\113', '\114', '\115', '\116', '\117', '\120', diff --git a/tests/dd/ascii.sh b/tests/dd/ascii.sh new file mode 100755 index 0000000..9ef158f --- /dev/null +++ b/tests/dd/ascii.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# test conv=ascii + +# Copyright (C) 2014 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ dd + +( + # Two lines, EBCDIC " A A" and " A ", followed by all the bytes in order. + printf '\100\301\100\301\100\301\100\100' && + printf $(for i in $(seq 0 255); do printf '\\%03o' $i; done; echo '') +) >in || framework_failure_ + +( + # The converted lines, with trailing spaces removed. + printf ' A A\n A\n' && + printf '\000\001\002\003\n\234\011\206\177\n' && + printf '\227\215\216\013\n\014\015\016\017\n' && + printf '\020\021\022\023\n\235\205\010\207\n' && + printf '\030\031\222\217\n\034\035\036\037\n' && + printf '\200\201\202\203\n\204\012\027\033\n' && + printf '\210\211\212\213\n\214\005\006\007\n' && + printf '\220\221\026\223\n\224\225\226\004\n' && + printf '\230\231\232\233\n\024\025\236\032\n' && + printf '\040\240\241\242\n\243\244\245\246\n' && + printf '\247\250\325\056\n\074\050\053\174\n' && + printf '\046\251\252\253\n\254\255\256\257\n' && + printf '\260\261\041\044\n\052\051\073\176\n' && + printf '\055\057\262\263\n\264\265\266\267\n' && + printf '\270\271\313\054\n\045\137\076\077\n' && + printf '\272\273\274\275\n\276\277\300\301\n' && + printf '\302\140\072\043\n\100\047\075\042\n' && + printf '\303\141\142\143\n\144\145\146\147\n' && + printf '\150\151\304\305\n\306\307\310\311\n' && + printf '\312\152\153\154\n\155\156\157\160\n' && + printf '\161\162\136\314\n\315\316\317\320\n' && + printf '\321\345\163\164\n\165\166\167\170\n' && + printf '\171\172\322\323\n\324\133\326\327\n' && + printf '\330\331\332\333\n\334\335\336\337\n' && + printf '\340\341\342\343\n\344\135\346\347\n' && + printf '\173\101\102\103\n\104\105\106\107\n' && + printf '\110\111\350\351\n\352\353\354\355\n' && + printf '\175\112\113\114\n\115\116\117\120\n' && + printf '\121\122\356\357\n\360\361\362\363\n' && + printf '\134\237\123\124\n\125\126\127\130\n' && + printf '\131\132\364\365\n\366\367\370\371\n' && + printf '\060\061\062\063\n\064\065\066\067\n' && + printf '\070\071\372\373\n\374\375\376\377\n' +) >exp || framework_failure_ + +dd if=in of=out conv=ascii cbs=4 +cp ./in ./out ./exp /tmp + +fail=0 +compare exp out || fail=1 + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk index 6d44144..5286bfb 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -470,6 +470,7 @@ all_tests = \ tests/df/no-mtab-status.sh \ tests/df/skip-duplicates.sh \ tests/df/skip-rootfs.sh \ + tests/dd/ascii.sh \ tests/dd/direct.sh \ tests/dd/misc.sh \ tests/dd/no-allocate.sh \ -- 1.9.0