[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Savannah-register-public] Re: [task #4224] Submission of shuffle
From: |
Davis Houlton |
Subject: |
[Savannah-register-public] Re: [task #4224] Submission of shuffle |
Date: |
Sun, 5 Jun 2005 09:46:02 +0000 |
User-agent: |
KMail/1.7.2 |
On Saturday 04 June 2005 20:46, Sebastian Wieseler wrote:
> Can you please mail ``shuffle.c'' to me or attach this file on the tracker
> item?
I've tried emailing a package of attachments, but that was bounced, so if this
is a duplicate, I apologize. For my second attempt, I just pasted the text at
the end of this message.
To build, a simple gcc -o shuffle shuffle.c should suffice...(with your
favorite CFLAGS, of course).
Sure...here we are:
/* BEGIN */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <libgen.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <string.h>
#include <time.h>
#define PROGRAM_NAME "shuffle"
#define AUTHORS "Davis Houlton"
#define DEF_BUFFER_SIZE 1000
/*g_stdin_flag: TRUE, use stdin, FALSE, use parameters as files*/
int g_stdin_flag=0;
int g_stdout_flag=0;
/*g_max_lines_output: If 0, display all input; if <0, display output from end;
if >0, limit display to first x lines.*/
int g_max_lines_output=0;
int g_limit_output_flag=0;
char* g_output_file=NULL;
/*g_delimiter: Use this character to break our data into distinct elements*/
char g_delimiter='\n';
/*g_cmd: Command name, pulled from commandline*/
char* g_cmd=NULL;
/*g_input: global input buffer*/
char** g_input=NULL;
long g_input_size=0;
long g_line_count=0;
/*Display usage*/
void display_usage(void) {
printf("Usage: %s [OPTION]... [FILE]...\n",g_cmd);
puts("shuffle contents of FILE(s), or standard input.");
puts("");
puts(" -d, --use-custom-delimiter=CHAR Specify delimiter");
puts(" -h, --head=LINES Limit output to LINES");
puts(" -o, --output=OUT_FILE Place ALL output in OUT_FILE");
puts(" -z, --use-null-delimiter Use null character as delimiter");
puts(" -?, --help This help message.");
puts("");
puts("When no FILE, input is standard input and default output is standard
output.");
puts("When FILE, input is FILE and output is FILE");
puts("When -o OUT_FILE, *all* output is placed in OUT_FILE. If OUT_FILE is -,
ouput is standard output.");
}
/* Calclate new random line to pull*/
long get_random_line(long file_line_count) {
return 1+(long) ( ((float) file_line_count)*rand()/(RAND_MAX+1.0));
}
/* Shuffle a file */
void shuffle(char* file, int final) {
/*Input Buffers*/
char** realloc_buffer;
char* last_line;
/*Buffer meta-data*/
long file_line_estimate=0;
long bytes_read=0;
/*Line meta-deta*/
ssize_t line_len=0;
ssize_t last_line_len=0;
size_t buffer_size=0;
long random_line=0;
/*File data*/
FILE* fp=NULL;
struct stat file_info;
/*Misc*/
int rv=0;
int i=0;
/*Setup input file*/
if (g_stdin_flag) {
/*Use stdin for input*/
fp=stdin;
g_input_size+=DEF_BUFFER_SIZE;
}
else {
/*Use file for input*/
fp=fopen(file,"r");
if (fp==NULL) {
printf("%s: Error! could not open %s (errno=%d), halting.
\n",g_cmd,file,errno);
exit(1);
}
/*We need the filesize to guestimate a first crack at buffer size*/
rv=stat(file,&file_info);
if (rv<0) {
printf("%s: Error! could not stat %s (errno=%d), halting.
\n",g_cmd,file,errno);
exit(1);
}
/*Figure roughly average of 80 characters per line*/
file_line_estimate=file_info.st_size/81+1;
g_input_size+=file_line_estimate;
}
/*Setup input buffer*/
g_input=(char**) realloc(g_input, sizeof(char*) * g_input_size);
g_input[g_line_count]=NULL;
/*Read each file line into memory*/
while
( (line_len=getdelim(&g_input[g_line_count],&buffer_size,g_delimiter,fp)) !=
-1 ) {
/*Keep track of how much we've read*/
last_line_len=line_len;
bytes_read+=line_len;
g_line_count++;
/*If we advance to the end of our line buffer, we'll need to allocate
*space in memory for more lines.*/
if (g_line_count==g_input_size) {
/*We've reached the end of our line buffer, figure out how many bytes
*we have left over (if we can), and use that as the % increase for our
buffer size.*/
if (g_stdin_flag) {
/*Increase the buffer for stdin by a default amount--no
*way to predict final line count really.*/
g_input_size+=DEF_BUFFER_SIZE;
}
else {
/*Recalculate new number of lines we'll need*/
g_input_size+=(file_info.st_size/bytes_read+1)*file_line_estimate;
}
realloc_buffer=(char**) realloc(g_input,sizeof(char*)*g_input_size);
if (realloc_buffer!=NULL) {
g_input=realloc_buffer;
}
else {
/*Error-realloc failed*/
printf("%s: Error! Out of memory (requested %d bytes, got
0)\n",g_cmd,g_input_size);
}
}
/*Prepare for next line*/
g_input[g_line_count]=NULL;
buffer_size=0;
}
/* If a line ends with a delimiter, then we may have erronously allocated an
extra line*/
if (line_len<0) {
/*If final, we free that extra buffer so we don't get leaks with multiple
files with no -o*/
if (final) {
free(g_input[g_line_count]);
}
line_len=last_line_len;
}
/*Last line may not have delimiter present, so we may need to add it.*/
/*Note: we don't care about zero delimiters, since we write that out manually
anyway.*/
if ( (g_delimiter!='\0') && (g_input[g_line_count-1]
[line_len-1]!=g_delimiter) ) {
/*Grow the last line by the delimiter character*/
last_line=(char*)
realloc(g_input[g_line_count-1],sizeof(char)*(line_len+1));
if (last_line!=NULL) {
last_line[line_len-1]=g_delimiter;
last_line[line_len-1]=0;
g_input[g_line_count-1]=last_line;
}
else {
/*Error-realloc failed.*/
printf("%s: Error! Out of memory (line %d %s %s, len %d).
\n",g_cmd,g_line_count,g_input[g_line_count-1],last_line,line_len+1);
exit(3);
}
}
fclose(fp);
/*Only shuffle if file is last file in input set*/
if (final) {
/*Setup output file*/
if (g_stdout_flag) {
/*Use stdout for output*/
fp=stdout;
}
else {
/*Overwrite original file with new shuffled contents, unless -o is
present*/
if (g_output_file==NULL) {
fp=fopen(file,"w");
}
else {
fp=fopen(g_output_file,"w");
}
if (fp==NULL) {
printf("%s: Error! Could not write to %s (errno=%d), halting.
\n",g_cmd,file,errno);
exit(2);
}
}
/*We grab a line out of our buffer at random and write that to file. We then
*delete that line from the array, shrinking our input set by 1. At the last
iteration
*we have only one line.*/
while (g_line_count>0) {
/*Randomly select a line*/
random_line=get_random_line(g_line_count);
/*If we are limiting our output, we need to make sure we are within limit
range.*/
if (g_limit_output_flag) {
/* if output only first X lines...*/
if (g_max_lines_output>0) {
/*Write line to file*/
fprintf(fp,"%s",g_input[random_line-1]);
g_max_lines_output--;
/*if we have filled output limit, destroy input and we're done!*/
if (g_max_lines_output==0) {
for (i=0;i<g_line_count;i++) {
free(g_input[i]);
g_input[i]=NULL;
}
/*the g_line_count-- below will end the loop*/
g_line_count=1;
}
}
else {
/* only output X last lines...*/
if (g_line_count==abs(g_max_lines_output)) {
/*Write line to file*/
fprintf(fp,"%s",g_input[random_line-1]);
g_max_lines_output++;
}
}
}
else {
/*Write line to file*/
fprintf(fp,"%s",g_input[random_line-1]);
}
/*NOTE: All g_input past this point COULD be NULL.*/
/*Shrink array by replacing current random selection with last line*/
free(g_input[random_line-1]);
if (random_line!=g_line_count) {
g_input[random_line-1]=g_input[g_line_count-1];
}
g_input[g_line_count]=NULL;
g_line_count--;
}
fclose(fp);
/*Clean up global memory*/
free(g_input);
g_input=NULL;
g_line_count=0;
g_input_size=0;
}
}
int main(int argc, char **argv) {
int option;
int option_ctr=0;
int last_input_file_flag=0;
int rv;
static struct option options[] = {
{"output",required_argument,NULL,'o'},
{"use-null-delimiter",no_argument,NULL,'z'},
{"use-custom-delimiter",required_argument,NULL,'d'},
{"head",required_argument,NULL,'h'},
{"help",no_argument,NULL,'?'},
{0,0,0,0}
};
srandom(time(NULL));
/*salt random number generator*/
for (rv=0; rv<=(50); rv++) {
get_random_line(g_line_count);
}
/*Identify command name*/
g_cmd=basename(argv[0]);
while( (option=getopt_long(argc,argv,"?o:zd:h:",options,NULL) )!=-1) {
switch(option) {
case '?':
display_usage();
exit(0);
/*-z, --use-null-delimiter: use \0 as a data break*/
case 'z':
option_ctr++;
g_delimiter='\0';
break;
/*-d, --use-custom-delimiter (char): use operator supplied delimiter as data
break*/
case 'd':
if (optarg[1]!='\0') {
printf("%s: ERROR - -d: delimiter (%s) must be a single character.
\n",g_cmd,optarg);
exit(4);
}
option_ctr++;
g_delimiter=optarg[0];
break;
/*-h, --head (int): display only x first or last lines*/
case 'h':
if (optarg==NULL) {
printf("%s: ERROR - -h: display line limiter must be an integer.
\n",g_cmd);
exit(4);
}
g_max_lines_output=strtol(optarg,NULL,10);
if (errno==ERANGE) {
printf("%s: ERROR - -h: display line limiter (%s) must be an integer.
\n",g_cmd,optarg);
exit(4);
}
if (g_max_lines_output!=0) {
g_limit_output_flag=1;
}
break;
/*-o, --output (char *): place all output in a specific file*/
case 'o':
g_output_file=strdup(optarg);
/*on -o -, output is directed to stdout*/
if ( (strcmp(g_output_file,"-")==0) ) {
g_stdout_flag=1;
}
}
}
/*If no other parameters are present, use stdin as the input source*/
if (optind>=argc) {
g_stdin_flag=1;
/*If no output parameters are present, assume stdout as output destination*/
if (g_output_file==NULL) {
g_stdout_flag=1;
}
shuffle(NULL,1);
}
else {
/*Shuffle input files*/
while (optind<argc) {
/*If we are writing output via -o, we only actually shuffle and write upon
reading of final file.*/
if ((g_output_file==NULL) || (optind==argc-1)) {
last_input_file_flag=1;
}
shuffle(argv[optind],last_input_file_flag);
optind++;
}
}
exit(0);
}