I am working on a program written in C that recursively walks a given directory in order to print out the Nth largest files and their sizes in bytes. I am using two arrays to account for the filesystem entry names and filesystem entry sizes respectively.
EDIT: I have updated my program to implement the suggestions shared in the comment section. My focus now is on correctly implementing a swap operation within my iSort
function.
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <string.h>
// number of files to display size information for
const int N = 10;
// a struct used to hold filesystem entry names and corresponding sizes in bytes
struct info {
char name[1024];
int size;
};
/* A simple implementation of insertion sort that will operate upon
an array of info structs, sorting them by their member size in
ascending order */
void iSort(struct info *fs_info[], int size, int info_size)
{
int i, j, key;
for (i = 1; i < size; i++)
{
key = fs_info[i]->size;
j = i - 1;
while (j >= 0 && fs_info[j]->size > key)
{
printf("info_size: %d\n", info_size);
// TODO complete a swap operation
memmove(fs_info[j + 1], fs_info[j], info_size);
j = j - 1;
}
fs_info[j + 1]->size = key;
}
}
void get_size(char *path, struct info fs_info[N], int info_size)
{
static int items_added = 0;
static int max_size = 0;
struct stat st;
if (stat(path, &st) == 0)
{
if (items_added < N) // if array capacity will not be exceeded
{
strcpy(fs_info[items_added].name, path);
fs_info[items_added].size = st.st_size;
if (st.st_size > max_size)
max_size = st.st_size;
items_added++;
}
else
{
// do a comparison to determine where to insert
// sort first
iSort(&fs_info, 10, info_size); // this function call results in a seqfault
}
}
else
{
printf("Error getting stat for entry %s: %d\n", path, stat(path, &st));
}
}
void walk(const char *currDir, struct info fs_info[N], int info_size)
{
DIR *dir = opendir(currDir);
struct dirent *entry;
if (dir == NULL)
{
// directory could not be opened
return;
}
while ((entry = readdir(dir)) != NULL)
{
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
{
// if directory is current dir or parent dir
continue;
}
char path_to_entry[1024];
snprintf(path_to_entry, sizeof(path_to_entry), "%s/%s", currDir, entry->d_name);
// use path_to_entry to call stats on the entry
get_size(path_to_entry, fs_info, info_size);
if (entry->d_type == DT_DIR)
{
// recursively visit subdirectories
walk(path_to_entry, fs_info, info_size);
}
}
closedir(dir);
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("Usage: %s <target directory>\n", argv[0]);
}
const char *target_dir = argv[1];
struct info fs_entries[N];
const int info_size = sizeof(struct info);
for (int i = 0; i < N; i++)
{
strcpy(fs_entries[i].name, "");
fs_entries[i].size = 0;
}
printf("Finding %d largest files in: %s\n", N, target_dir);
walk(target_dir, fs_entries, info_size);
for (int i = 0; i < N; i++)
{
printf("%s : %d\n", fs_entries[i].name, fs_entries[i].size);
}
return 0;
}
Currently, the memmove()
invocation in iSot()
results in a EXC_BAD_ACCESS
error, I am working on improving this function now. Any suggestions in the interim are appreciated. Thanks also to those who commented on this question earlier.
This is not so much an answer as it is an example of how one might code this with a bit less fiddling about with every bit/byte.
I don't run a flavour of UNIX, so this offering is untested and may contain typos and even bugs. I hope not.
#include <stdio.h> // From 'generic' to 'specific'
#include <string.h>
#include <sys/stat.h>
#include <dirent.h>
const int N = 10;
// Global array holding names and sizes
struct {
char name[ MAX_PATH ];
size_t size;
} biggies[ N ], wrk; // and a working buffer.
/* "Global" is bad for large projects.
* In this 'utility' program, global saves a LOT of typing/reading.
* Seek clarity, not conformity,
*
* "wrk" is used to buffer ALL paths encountered.
* Notice that each recursion is an EXTENSION of its parent.
* ONE working buffer to deal with.
*/
void walk() {
size_t len = strlen( wrk.name ); // The path so far...
DIR *dir;
if( ( dir = opendir( wrk.name ) ) == NULL )
return;
wrk.name[ len++ ] = '/'; // append a slash ahead of strcpy() below
struct dirent *entry;
while( ( entry = readdir( dir ) ) != NULL ) {
if( strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0 )
continue;
// Notice how each 'local' name is written to "the right spot" in one buffer
strcpy( wrk.name + len, entry->d_name );
if( entry->d_type == DT_DIR ) // directory, so recursion...
walk();
else {
struct stat st;
if( stat( wrk.name, &st ) != 0 ) {
fprintf( stderr, "Error stat'ing '%s'\n", wrk.name );
exit( EXIT_FAILURE );
}
// Add this info to working buffer.
wrk.size = st.st_size;
// Find where to 'insert' this (if it is larger than descending ordered list
for( int i = 0; i < N && biggies[i].size > wrk.size; i++ ) {} // loop
if( i < N ) {
// Slide smaller ones down (don't go out of bounds)
memmove( biggies[i + 1], biggies[i], (N-i-1) * sizeof biggies[0] );
// Copy this one in place.
memcpy( biggies[i], &wrk, sizeof biggies[0] );
}
}
}
closedir(dir);
}
int main( int argc, char *argv[]) {
char *target_dir = argv[1]; // This is okay, so far...
if( argc != 2 ) {
printf( "Usage: %s <target directory>\n", argv[0] );
puts( "Using current directory..." );
target_dir = ".";
}
strcpy( wrk.name, target_dir );
printf( "Finding %d largest files in: %s\n", N, wrk.name );
walk();
for( size_t i = 0; i < N && biggies[i].size != 0; i++ )
printf( "%s : %d\n", biggies[i].name, biggies[i].size);
return 0;
}