stats.c

/*
 * statistical analysis of file to see if it is binary or text data
 * 
 * every SKIP'th character is read, and checked to see if it is printable
 * (white space is considered printable)
 *
 * if at least THRESHHOLD% characters are printable, it's binary; otherwise
 * it's text INPORTANT: THRESHHOLD is a percentage, not a fraction!)
 *
 * Matt Bishop, ECS 36A
 * -- May 22, 2024		Original program
 */
#include <stdio.h>
#include <ctype.h>

#define	SKIP	10		/* read 1 out of every SKIP characters */
#define THRESHHOLD	25	/* a percent, *not* a fraction! */

/*
 * test to see if this is a binary file
 */
void bintest(char *fname, FILE *fp)
{
	int ch;				/* input character */
	int bin = 0;			/* number of binary characters */
	int nonbin = 0;			/* number of printing characters */
	int sample_size = 0;		/* number of characters read */
	double frac = 0.0;		/* percentage of binary characters */

	/*
	 * skip ahead SKIP characters and analyze the character
	 */
	while(fseek(fp, SKIP, SEEK_CUR) != -1){
		/* if done, break out of the loop */
		if ((ch = fgetc(fp)) == EOF)
			break;
		/* read another character */
		sample_size += 1;
		/* classify the character; note spaces */
		/* are considered printable characters */
		if (isprint(ch))
			nonbin++;
		else
			bin++;
	}

	/*
	 * now do the analysis
	 */
	/* what percent of the sampled characters are non-printable? */
	frac = ((double) bin) / ((double) sample_size) * 100;

	/* print out the results */
	printf("%s: %d samples,", fname, sample_size);
	printf(" %d non-binary, %d (%0.2f%%) binary; a ", nonbin, bin, frac);
	printf(frac < THRESHHOLD ? "text" : "binary");
	printf(" file\n");
}

/*
 * it all begins here
 */
int main(int argc, char *argv[])
{
	FILE *fp;	/* input file */
	char **a;	/* pointer in a for loop */
	int rv = 0;	/* return value; number of file open failures */

	/* be sure there's at least one file named */
	if (argc < 2){
		fprintf(stderr, "Usage: %s file [ ... ]\n", argv[0]);
		return(1);
	}

	/* walk the argument list */
	for (a = argv+1; *a != NULL; a++){
		/* open the file */
		if ((fp = fopen(*a, "r")) == NULL){
			perror(*a);
			rv++;
			continue;
		}
		/* do the test */
		bintest(*a, fp);
		/* done with this file */
		(void) fclose(fp);
	}

	/* that's all folks! */
	return(rv);
}




UC Davis sigil
Matt Bishop
Office: 2209 Watershed Sciences
Phone: +1 (530) 752-8060
Email: mabishop@ucdavis.edu
ECS 36A, Programming & Problem Solving
Version of April 2, 2024 at 12:13PM

You can get the raw source code here.

Valid HTML 4.01 Transitional Built with BBEdit Built on a Macintosh