savannah-register-public
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Savannah-register-public] Re: [task #4224] Submission of shuffle


From: Davis Houlton
Subject: [Savannah-register-public] Re: [task #4224] Submission of shuffle
Date: Mon, 6 Jun 2005 09:17:49 +0000
User-agent: KMail/1.7.2

On Saturday 04 June 2005 20:46, Sebastian Wieseler wrote:
> Can you please mail ``shuffle.c'' to me or attach this file on the tracker
> item?

Ok, here we are, with the GPL included...
/* 
    shuffle - A command to randomize file contents
    Copyright (C) 2005 Davis Houlton.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <libgen.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <string.h>
#include <time.h>

#define PROGRAM_NAME "shuffle"
#define AUTHORS "Davis Houlton"

#define DEF_BUFFER_SIZE 1000

/*g_stdin_flag: TRUE, use stdin, FALSE, use parameters as files*/
int g_stdin_flag=0;
int g_stdout_flag=0;

/*g_max_lines_output: If 0, display all input; if <0, display output from end; 
if >0, limit display to first x lines.*/
int g_max_lines_output=0;
int g_limit_output_flag=0;

char* g_output_file=NULL;

/*g_delimiter: Use this character to break our data into distinct elements*/
char g_delimiter='\n';

/*g_cmd: Command name, pulled from commandline*/
char* g_cmd=NULL;

/*g_input: global input buffer*/
char** g_input=NULL;
long   g_input_size=0;
long   g_line_count=0;

/*Display usage*/
void display_usage(void) {
 printf("Usage: %s [OPTION]... [FILE]...\n",g_cmd);
 puts("shuffle contents of FILE(s), or standard input.");
 puts("");
 puts("  -d, --use-custom-delimiter=CHAR  Specify delimiter");
 puts("  -h, --head=LINES                 Limit output to LINES");
 puts("  -o, --output=OUT_FILE            Place ALL output in OUT_FILE");
 puts("  -z, --use-null-delimiter         Use null character as delimiter");
 puts("  -?, --help                       This help message.");
 puts("");
 puts("When no FILE, input is standard input and default output is standard 
output.");
 puts("When FILE, input is FILE and output is FILE");
 puts("When -o OUT_FILE, *all* output is placed in OUT_FILE. If OUT_FILE is -, 
ouput is standard output.");
}
/* Calclate new random line to pull*/
long get_random_line(long file_line_count) {
 return 1+(long) ( ((float) file_line_count)*rand()/(RAND_MAX+1.0)); 
}

/* Shuffle a file */
void shuffle(char* file, int final) {
 /*Input Buffers*/
 char** realloc_buffer;
 char* last_line;

 /*Buffer meta-data*/
 long file_line_estimate=0;
 long bytes_read=0;

 /*Line meta-deta*/
 ssize_t line_len=0;
 ssize_t last_line_len=0;
 size_t buffer_size=0;
 long random_line=0;

 /*File data*/
 FILE* fp=NULL;
 struct stat file_info;

 /*Misc*/
 int rv=0;
 int i=0;

 /*Setup input file*/
 if (g_stdin_flag) {
  /*Use stdin for input*/
  fp=stdin;
  g_input_size+=DEF_BUFFER_SIZE;
 }
 else {
  /*Use file for input*/
  fp=fopen(file,"r");
  if (fp==NULL) {
   printf("%s: Error! could not open %s (errno=%d), halting.
\n",g_cmd,file,errno);
   exit(1);
  }

  /*We need the filesize to guestimate a first crack at buffer size*/
  rv=stat(file,&file_info);
  if (rv<0) {
   printf("%s: Error! could not stat %s (errno=%d), halting.
\n",g_cmd,file,errno);
   exit(1);
  }
 
  /*Figure roughly average of 80 characters per line*/
  file_line_estimate=file_info.st_size/81+1;
  g_input_size+=file_line_estimate;
 }

 /*Setup input buffer*/
 g_input=(char**) realloc(g_input, sizeof(char*) * g_input_size); 
 g_input[g_line_count]=NULL;

 /*Read each file line into memory*/
 while 
( (line_len=getdelim(&g_input[g_line_count],&buffer_size,g_delimiter,fp)) != 
-1 ) {
  /*Keep track of how much we've read*/
  last_line_len=line_len;
  bytes_read+=line_len;
  g_line_count++;

  /*If we advance to the end of our line buffer, we'll need to allocate
   *space in memory for more lines.*/
  if (g_line_count==g_input_size) {

   /*We've reached the end of our line buffer, figure out how many bytes
    *we have left over (if we can), and use that as the % increase for our 
buffer size.*/
   if (g_stdin_flag) {
    /*Increase the buffer for stdin by a default amount--no
     *way to predict final line count really.*/
    g_input_size+=DEF_BUFFER_SIZE;
   }
   else {
    /*Recalculate new number of lines we'll need*/
    g_input_size+=(file_info.st_size/bytes_read+1)*file_line_estimate;
   }

   realloc_buffer=(char**) realloc(g_input,sizeof(char*)*g_input_size);

   if (realloc_buffer!=NULL) {
    g_input=realloc_buffer;
   }
   else {
    /*Error-realloc failed*/
    printf("%s: Error! Out of memory (requested %d bytes, got 
0)\n",g_cmd,g_input_size);
   }
  }

  /*Prepare for next line*/
  g_input[g_line_count]=NULL;
  buffer_size=0;
 }

 /* If a line ends with a delimiter, then we may have erronously allocated an 
extra line*/
 if (line_len<0) {
  /*If final, we free that extra buffer so we don't get leaks with multiple 
files with no -o*/
  if (final) {
   free(g_input[g_line_count]);
  }
  line_len=last_line_len;
 }
 

 /*Last line may not have delimiter present, so we may need to add it.*/
 /*Note: we don't care about zero delimiters, since we write that out manually 
anyway.*/
 if ( (g_delimiter!='\0') && (g_input[g_line_count-1]
[line_len-1]!=g_delimiter) ) {
  /*Grow the last line by the delimiter character*/ 
  last_line=(char*) 
realloc(g_input[g_line_count-1],sizeof(char)*(line_len+1));
  if (last_line!=NULL) {
   last_line[line_len-1]=g_delimiter;
   last_line[line_len-1]=0;
   g_input[g_line_count-1]=last_line;
  }
  else {
   /*Error-realloc failed.*/
   printf("%s: Error! Out of memory (line %d %s %s, len %d).
\n",g_cmd,g_line_count,g_input[g_line_count-1],last_line,line_len+1);
   exit(3);
  }
 }
 fclose(fp);

 /*Only shuffle if file is last file in input set*/
 if (final) {
  /*Setup output file*/
  if (g_stdout_flag) {
   /*Use stdout for output*/
   fp=stdout;
  }
  else {
   /*Overwrite original file with new shuffled contents, unless -o is 
present*/
   if (g_output_file==NULL) { 
    fp=fopen(file,"w");
   }
   else {
    fp=fopen(g_output_file,"w");
   }

   if (fp==NULL) {
    printf("%s: Error! Could not write to %s (errno=%d), halting.
\n",g_cmd,file,errno);
    exit(2);
   }
  }


  /*We grab a line out of our buffer at random and write that to file. We then 
   *delete that line from the array, shrinking our input set by 1. At the last 
iteration
   *we have only one line.*/
  while (g_line_count>0) {
   /*Randomly select a line*/
   random_line=get_random_line(g_line_count);     
 
   /*If we are limiting our output, we need to make sure we are within limit 
range.*/
   if (g_limit_output_flag) {
    /* if output only first X lines...*/
    if (g_max_lines_output>0) {
     /*Write line to file*/
     fprintf(fp,"%s",g_input[random_line-1]);
     g_max_lines_output--;
 
     /*if we have filled output limit, destroy input and we're done!*/
     if (g_max_lines_output==0) {
      for (i=0;i<g_line_count;i++) {
       free(g_input[i]);
       g_input[i]=NULL;
      }
      /*the g_line_count-- below will end the loop*/
      g_line_count=1; 
     } 
    }
    else {
     /* only output X last lines...*/
     if (g_line_count==abs(g_max_lines_output)) {
      /*Write line to file*/
      fprintf(fp,"%s",g_input[random_line-1]);
      g_max_lines_output++;
     }
    }
   }
   else {
    /*Write line to file*/
    fprintf(fp,"%s",g_input[random_line-1]);
   }

   /*NOTE: All g_input past this point COULD be NULL.*/

   /*Shrink array by replacing current random selection with last line*/
   free(g_input[random_line-1]);
   if (random_line!=g_line_count) {
    g_input[random_line-1]=g_input[g_line_count-1];
   }
   g_input[g_line_count]=NULL;

   g_line_count--;
  }
  fclose(fp);

  /*Clean up global memory*/
  free(g_input);
  g_input=NULL;
  g_line_count=0;
  g_input_size=0;
 }
}

int main(int argc, char **argv) {
 int option;
 int option_ctr=0;
 int last_input_file_flag=0;
 int rv;
 static struct option options[] = {
  {"output",required_argument,NULL,'o'},
  {"use-null-delimiter",no_argument,NULL,'z'},
  {"use-custom-delimiter",required_argument,NULL,'d'},
  {"head",required_argument,NULL,'h'},
  {"help",no_argument,NULL,'?'},
  {0,0,0,0}
  
 };

 srandom(time(NULL));
 /*salt random number generator*/
 for (rv=0; rv<=(50); rv++) {
  get_random_line(g_line_count);
 }

 /*Identify command name*/
 g_cmd=basename(argv[0]);

 while( (option=getopt_long(argc,argv,"?o:zd:h:",options,NULL) )!=-1)  {
  switch(option) {
  case '?': 
   display_usage();
   exit(0);

  /*-z, --use-null-delimiter: use \0 as a data break*/
  case 'z':
   option_ctr++;
   g_delimiter='\0';
   break;

  /*-d, --use-custom-delimiter (char): use operator supplied delimiter as data 
break*/
  case 'd':
   if (optarg[1]!='\0') {
    printf("%s: ERROR - -d: delimiter (%s) must be a single character.
\n",g_cmd,optarg);
    exit(4);
   }
   option_ctr++;
   g_delimiter=optarg[0];
   break;
  
  /*-h, --head (int): display only x first or last lines*/
  case 'h':
   if (optarg==NULL) {
    printf("%s: ERROR - -h: display line limiter must be an integer.
\n",g_cmd);
    exit(4);
   }

   g_max_lines_output=strtol(optarg,NULL,10);
   if (errno==ERANGE) {
    printf("%s: ERROR - -h: display line limiter (%s) must be an integer.
\n",g_cmd,optarg);
    exit(4);
   }

   if (g_max_lines_output!=0) {
    g_limit_output_flag=1;
   }
   break;

  /*-o, --output (char *): place all output in a specific file*/
  case 'o':
   g_output_file=strdup(optarg);
   
   /*on -o -, output is directed to stdout*/
   if ( (strcmp(g_output_file,"-")==0) ) {
    g_stdout_flag=1;
   }
  }


 }

 /*If no other parameters are present, use stdin as the input source*/
 if (optind>=argc) {
  g_stdin_flag=1;

  /*If no output parameters are present, assume stdout as output destination*/
  if (g_output_file==NULL) {
   g_stdout_flag=1;
  }

  shuffle(NULL,1);
 }
 else {
  /*Shuffle input files*/
  while (optind<argc) {
   /*If we are writing output via -o, we only actually shuffle and write upon 
reading of final file.*/
   if ((g_output_file==NULL) || (optind==argc-1)) {
    last_input_file_flag=1;
   }
   shuffle(argv[optind],last_input_file_flag);
   optind++;
  }
 }
 
 exit(0);
}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]