cstringdirent.h

Find all files containing search term except sub elements


I'm trying to make a C program that will display all the files and folders containing a given search term. The search term is given as an argument when executing the program. A folder / file is displayed to standard output if its name contains the search term (case insensitive). The difficulty though is that I do not want to output files and subfolders that are contained in a folder that contains the search term. Here's an example:

Let's assume my search term is docker, this is the current output:

"/Users/me/.docker"
"/Users/me/.docker/contexts"
"/Users/me/.docker/contexts/meta"
"/Users/me/.docker/config.json"
"/Users/me/.docker/scan"
"/Users/me/.docker/scan/config.json"
"/Users/me/.docker/application-template"
"/Users/me/.docker/application-template/logs"
"/Users/me/.docker/application-template/logs/com.docker.log"
"/Users/me/.docker/daemon.json"
"/Users/me/.docker/run"
"/Users/me/Library/Application Support/Docker Desktop"
"/Users/me/Library/Application Support/Docker Desktop/blob_storage"
"/Users/me/Library/Application Support/Docker Desktop/blob_storage/6965e70b-e33a-4415-b9a8-e19996fe221d"

But this is the output I'm trying to achieve:

"/Users/me/.docker"
"/Users/me/Library/Application Support/Docker Desktop"

Here's my code so far:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>

static int display_info(const char *fpath, const char * fname, const char * term) {
    int what_len = strlen(term);
    int count = 0;

    char *where = fpath;

    if (what_len){
       while ((where = strcasestr(where, term))) {
                where += what_len;
                count++;
        }

        if (count == 1) {
            printf("\"%s/%s\"\n", fpath, fname);
        }

    }

    return 0;
}


static void listFilesRecursively(char * basePath, const char * searchTerm) {
    char path[1000];
    struct dirent * dp;
    DIR * dir = opendir(basePath);

    // Unable to open directory stream
    if (!dir)
        return;

    while ((dp = readdir(dir)) != NULL) {
        if (strcmp(dp -> d_name, ".") != 0 && strcmp(dp -> d_name, "..") != 0) {
//            printf("%s %hhu %s\n", basePath, dp->d_type, dp -> d_name);
            display_info(basePath, dp->d_name, searchTerm);

            // Construct new path from our base path
            strcpy(path, basePath);
            strcat(path, "/");
            strcat(path, dp -> d_name);

            listFilesRecursively(path, searchTerm);
        }
    }

    closedir(dir);
}


int main(int argc, const char * argv[]) {
    char * home = getenv("HOME");

    if (argc == 2) {
        listFilesRecursively(home, argv[1]);
    } else {
        printf("Please provide one argument");
    }
    return 0;
}

Any feedback is greatly appreciated thanks!


Solution

  • an alternative

    I will add a C example, changing a bit of your listFilesRecursively()...

    code for list_files() after change

    int list_files(char* pattern, char* base_path)
    {
        struct dirent* dp;
        DIR* dir = opendir(base_path);
        if (!dir) return -1; // Unable to open directory stream
        while ((dp = readdir(dir)) != NULL)
        {
            if (strcmp(dp->d_name, ".") == 0) continue;
            if (strcmp(dp->d_name, "..") == 0) continue;
            if ( strstr_ign((const char*)dp->d_name, pattern) != NULL )
            {
                display_info(base_path, dp->d_name);
            }
            else
            {
                char* path = (char*)malloc(1 + strlen(dp->d_name) + strlen(base_path) + 1);
                sprintf(path, "%s/%s", base_path, dp->d_name);
                list_files(pattern, path);
                free(path); // ok with path
            }
        };  // while()
        closedir(dir);
        return 0;
    };  // list_files()
    
    

    code for strstr_ign()

    I hate the arguments order for strstr() but kept it here just to have things equal. This way one can use strstr_ign() as a drop-in replacement for strstr() without changing the order of the arguments. I believe needle should come first :) an in the language: search for a needle in a haystack is far more common than search the haystack for a needle but Ken and Dennis had their reasons to write strstr() the way they did it...

    //
    // strstr() ignoring case
    //
    char*       strstr_ign(const char* haystack, const char* needle)
    {
        if (needle == NULL) return NULL;
        if (haystack == NULL) return NULL;
        if (*needle == 0)
        {
            if (*haystack == 0)
                return (char*) haystack;
            else
                return NULL;
        }
        int limit = strlen(haystack) - strlen(needle);
        for (int x = 0; x <= limit; x += 1)
        {   // search for needle at position 'x' of 'haystack'
            int here = 1;
            for (unsigned y = 0; y < strlen(needle); y += 1)
            {
                if ( tolower(haystack[x + y]) != tolower(needle[y]) )
                {
                    here = 0; break;
                };
            };
            if ( here == 1) return (char*)(haystack + x);
        }
        return NULL;
    };
    

    a new display_info()

    changed to show last access for folders and file size for regular files that match the search pattern (case insensitive). Below is an example of the output for files and folders. Note the '-' and the 'd' as in the ls -l output.

        - "./hms.c" [size: 1546]
        d "./sub/1/xyzHMSxyz"   [last access: Sat Apr 24 12:38:04 2021]
    
    int display_info(const char* base, const char* file)
    {
        struct  stat Stats;
        char*   path = (char*)malloc(1 + strlen(base) + strlen(file) + 1);
        char    atime[40];
        sprintf(path, "%s/%s", base, file);
        if ( stat(path, &Stats) < 0)
        {
            perror("Inside display_info()");
            free(path);
            return -1;
        }
        if ( S_ISDIR(Stats.st_mode) )
        {
            strftime( atime, sizeof(atime), "%a %b %d %H:%M:%S %Y", localtime(&Stats.st_atime) );
            printf("\td \"%s/%s\"\t[last access: %s]\n", base, file, atime );
        }
        else
        {
            if ( S_ISREG(Stats.st_mode) )
                printf("\t- \"%s/%s\"\t[size: %ld]\n", base, file, Stats.st_size );
            else
                printf("is somthing else\n");
        }
        free(path);
        return 0;
    }
    

    sample output

    Search pattern is "hms" (case is ignored) 
        - "./hms"   [size: 16848]
        - "./hms-soma.c"    [size: 1379]
        - "./hms.c" [size: 1546]
        d "./sub/1/xyzHMSxyz"   [last access: Sat Apr 24 12:38:04 2021]
        d "./sub/2/xyzHMS"  [last access: Sat Apr 24 12:21:11 2021]
        d "./sub/hMs"   [last access: Sat Apr 24 12:21:11 2021]
    

    C code for this test

    miminally tested :)

    #include <ctype.h>
    #include <dirent.h>
    #include <errno.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/stat.h>
    #include <sys/types.h>
    #include <time.h>
    
    
    int         display_info(const char*, const char*);
    int         list_files(char*, char*);
    char*       strstr_ign(const char*, const char*);
    
     int main(int argc, const char * argv[])
     {
         char    search_term[80];
         if (argc >= 2)
         {
             if ( strlen(argv[1]) > (sizeof(search_term)-1) )
             {
                 printf("Size of substring (%zd) must not be greater than %zd\n",
                 strlen(argv[1]), sizeof(search_term)-1 );
                 return -1;  
             }
             for ( int i = 0; i<= strlen(argv[1]); search_term[i] = (char)(tolower(argv[1][i])), i+=1 ); 
             printf("Search pattern is \"%s\" (case is ignored) \n", search_term );
             list_files(search_term,".");
         } else {
             printf("Please provide pattern to search for.\n");
         }
         return 0;
     };  // main()
    
    
    int display_info(const char* base, const char* file)
    {
        struct  stat Stats;
        char*   path = (char*)malloc(1 + strlen(base) + strlen(file) + 1);
        char    atime[40];
        sprintf(path, "%s/%s", base, file);
        if ( stat(path, &Stats) < 0)
        {
            perror("Inside display_info()");
            free(path);
            return -1;
        }
        if ( S_ISDIR(Stats.st_mode) )
        {
            strftime( atime, sizeof(atime), "%a %b %d %H:%M:%S %Y", localtime(&Stats.st_atime) );
            printf("\td \"%s/%s\"\t[last access: %s]\n", base, file, atime );
        }
        else
        {
            if ( S_ISREG(Stats.st_mode) )
                printf("\t- \"%s/%s\"\t[size: %ld]\n", base, file, Stats.st_size );
            else
                printf("is somthing else\n");
        }
        free(path);
        return 0;
    }
    
    
    int list_files(char* pattern, char* base_path)
    {
        struct dirent* dp;
        DIR* dir = opendir(base_path);
        if (!dir) return -1; // Unable to open directory stream
        while ((dp = readdir(dir)) != NULL)
        {
            if (strcmp(dp->d_name, ".") == 0) continue;
            if (strcmp(dp->d_name, "..") == 0) continue;
            if ( strstr_ign((const char*)dp->d_name, pattern) != NULL )
            {
                display_info(base_path, dp->d_name);
            }
            else
            {
                char* path = (char*)malloc(1 + strlen(dp->d_name) + strlen(base_path) + 1);
                sprintf(path, "%s/%s", base_path, dp->d_name);
                list_files(pattern, path);
                free(path); // ok with path
            }
        };  // while()
        closedir(dir);
        return 0;
    };  // list_files()
    
    //
    // strstr() ignoring case
    //
    char*       strstr_ign(const char* haystack, const char* needle)
    {
        if (needle == NULL) return NULL;
        if (haystack == NULL) return NULL;
        if (*needle == 0)
        {
            if (*haystack == 0)
                return (char*) haystack;
            else
                return NULL;
        }
        int limit = strlen(haystack) - strlen(needle);
        for (int x = 0; x <= limit; x += 1)
        {   // search for needle at position 'x' of 'haystack'
            int here = 1;
            for (unsigned y = 0; y < strlen(needle); y += 1)
            {
                if ( tolower(haystack[x + y]) != tolower(needle[y]) )
                {
                    here = 0; break;
                };
            };
            if ( here == 1) return (char*)(haystack + x);
        }
        return NULL;
    };