diff --git a/uspace/app/bdsh/meson.build b/uspace/app/bdsh/meson.build index fd8064e9e1..92c6d2c306 100644 --- a/uspace/app/bdsh/meson.build +++ b/uspace/app/bdsh/meson.build @@ -27,7 +27,7 @@ # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -deps = [ 'clui', 'fmtutil' ] +deps = [ 'clui', 'fmtutil', 'wildcards' ] includes += include_directories('.', 'cmds', 'cmds/builtins', 'cmds/modules') src = files( 'cmds/builtin_cmds.c', @@ -65,8 +65,10 @@ src = files( 'scli.c', 'tok.c', 'util.c', + ) + test_src = files( 'tok.c', 'test/toktest.c', diff --git a/uspace/app/bdsh/tok.c b/uspace/app/bdsh/tok.c index d9b9556b71..3e8ca97bbc 100644 --- a/uspace/app/bdsh/tok.c +++ b/uspace/app/bdsh/tok.c @@ -31,8 +31,10 @@ #include #include #include - #include "tok.h" +#include "wildcards/wildcards.h" +#include "stdio.h" + /* Forward declarations of static functions */ static char32_t tok_get_char(tokenizer_t *); @@ -43,6 +45,62 @@ static bool tok_pending_chars(tokenizer_t *); static errno_t tok_finish_string(tokenizer_t *); static void tok_start_token(tokenizer_t *, token_type_t); +/** Callback that pushes tokens after wildcard expansion */ +static errno_t push_expanded_wildcard_token(char *text, void *arg) { + tokenizer_t *tok = (tokenizer_t *) arg; + + if (tok->outtok_offset >= tok->outtok_size){ + return EOVERFLOW; + } + + if (tok->outbuf_offset + str_size(text) + 1 >= tok->outbuf_size){ + return EOVERFLOW; + } + + str_cpy(tok->outbuf + tok->outbuf_offset, + tok->outbuf_size - tok->outbuf_offset, text); + + token_t *tokinfo = &tok->outtok[tok->outtok_offset++]; + tokinfo->type = tok->current_type; + tokinfo->text = tok->outbuf + tok->outbuf_offset; + tokinfo->byte_start = tok->last_in_offset; + tokinfo->byte_length = str_size(text); + tokinfo->char_start = tok->last_in_char_offset; + tokinfo->char_length = str_length(text); + + tok->outbuf_offset += str_size(text) + 1; + tok->outbuf_last_start = tok->outbuf_offset; + + return EOK; +} + +/** Function that expands current token (if it contains wildcards) and pushes it to the buffer */ +static errno_t wildcard_token_expand(tokenizer_t *tok){ + tok->outbuf[tok->outbuf_offset] = '\0'; + char *text = tok->outbuf + tok->outbuf_last_start; + const char *ctext = str_dup(text); + if (ctext == NULL) { + return ENOMEM; + } + // printf("Pushed token: '%s'\n", ctext); + errno_t rc = expand_wildcard_patterns(ctext, "", push_expanded_wildcard_token, tok); + free((char *)ctext); + // rc = tok_push_token(tok); + if (rc != EOK) { + printf("Error pushing token: %i\n",rc); + return rc; + } + + // Update position info for next token + tok->last_in_offset = tok->in_offset; + tok->last_in_char_offset = tok->in_char_offset; + tok->outbuf_last_start = tok->outbuf_offset; + + return rc; +} + + + /** Initialize the token parser * * @param tok the tokenizer structure to initialize @@ -102,8 +160,9 @@ errno_t tok_tokenize(tokenizer_t *tok, size_t *tokens_length) * there are several spaces in the input. */ if (tok_pending_chars(tok)) { - rc = tok_push_token(tok); - if (rc != EOK) { + rc = wildcard_token_expand(tok); + + if (rc != EOK){ return rc; } } @@ -166,8 +225,9 @@ errno_t tok_tokenize(tokenizer_t *tok, size_t *tokens_length) /* Push the last token */ if (tok_pending_chars(tok)) { - rc = tok_push_token(tok); - if (rc != EOK) { + rc = wildcard_token_expand(tok); + + if (rc != EOK){ return rc; } } @@ -202,6 +262,7 @@ errno_t tok_finish_string(tokenizer_t *tok) } } else { rc = tok_push_char(tok, tok_get_char(tok)); + if (rc != EOK) { return rc; } diff --git a/uspace/dist/data/wildcards_test/.hidden b/uspace/dist/data/wildcards_test/.hidden new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/another_file.md b/uspace/dist/data/wildcards_test/another_file.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/dir/file_in_dir.txt b/uspace/dist/data/wildcards_test/dir/file_in_dir.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/dir/nested/deep.txt b/uspace/dist/data/wildcards_test/dir/nested/deep.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git "a/uspace/dist/data/wildcards_test/dir/\304\215u\304\215oriedka.md" "b/uspace/dist/data/wildcards_test/dir/\304\215u\304\215oriedka.md" new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/file.txt b/uspace/dist/data/wildcards_test/file.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/foo* b/uspace/dist/data/wildcards_test/foo* new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/multi.ext.file b/uspace/dist/data/wildcards_test/multi.ext.file new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/similar/apple b/uspace/dist/data/wildcards_test/similar/apple new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/similar/apples b/uspace/dist/data/wildcards_test/similar/apples new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/similar/appleseed b/uspace/dist/data/wildcards_test/similar/appleseed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/similar/application b/uspace/dist/data/wildcards_test/similar/application new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/spaced name.txt b/uspace/dist/data/wildcards_test/spaced name.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/dist/data/wildcards_test/testfile b/uspace/dist/data/wildcards_test/testfile new file mode 100644 index 0000000000..e69de29bb2 diff --git "a/uspace/dist/data/wildcards_test/\304\215aute.txt" "b/uspace/dist/data/wildcards_test/\304\215aute.txt" new file mode 100644 index 0000000000..e69de29bb2 diff --git a/uspace/lib/meson.build b/uspace/lib/meson.build index f3b92ee22b..bcef4b4960 100644 --- a/uspace/lib/meson.build +++ b/uspace/lib/meson.build @@ -122,6 +122,8 @@ libs = [ 'ui', 'vt', + + 'wildcards', ] # Generated list of include directory paths diff --git a/uspace/lib/wildcards/include/wildcards/wildcards.h b/uspace/lib/wildcards/include/wildcards/wildcards.h new file mode 100644 index 0000000000..d2bd94f65d --- /dev/null +++ b/uspace/lib/wildcards/include/wildcards/wildcards.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2025 Patrik Pritrsky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MAIN_H +#define MAIN_H +#include + +typedef errno_t (*wildcards_match_found_callback_t)(char *, void *arg); + + +bool contains_wildcard(const char *pattern); +errno_t wildcard_comp(const char *pattern, const char *file_name, bool *result); +errno_t expand_wildcard_patterns(const char *pattern, const char *path, wildcards_match_found_callback_t callback, void* arg); + +#endif // MAIN_H diff --git a/uspace/lib/wildcards/info.md b/uspace/lib/wildcards/info.md new file mode 100644 index 0000000000..77feba3e1e --- /dev/null +++ b/uspace/lib/wildcards/info.md @@ -0,0 +1,65 @@ +# Wildcard support for HelenOS - MSoC Project by Patrik Pritrsky + +In this project, I added support for wildcards in the HelenOS shell. +So, if the user wants to delete all text files in a directory, they just +need to type 'rm *.txt', instead of listing them all one by one. + +## Features + +### Standard wildcard * + +Expands to zero or more characters, evaluation happens recursively at +all levels where it occurs. + +For example, 'folder*/file*.txt' will find all text files starting with +'file', in all subdirectories of the current directory that start with +'folder'. + +### Recursive wildcard ** + +Used to find files at arbitrary depth. + +For example, '**/*.txt' will find all text files, at any depth within +the current directory. + +## List of changes to HelenOS + +- Added automated tests for wildcards +- Created a function to detect whether a string contains a wildcard +- Created a function to check whether a wildcard pattern matches a + file/directory name +- Created a function for recursive expansion and finding all + occurrences of files/directories that match a path/filename containing + wildcards +- Modified the HelenOS shell tokenizer to support wildcard expansion + +# Podpora zástupných znaků (wildcards) v HelenOSím shellu - MSoC Project by Patrik Pritrsky + +V tomto projekte som pridal podporu pre zástupné znaky (wildcards) v HelenOS shelle. +Teda, ak používateľ bude chcieť vymazať všetky textové súbory v priečinku, +tak mu stačí napísať 'rm *.txt', namiesto toho, aby ich všetky vymenoval. + +## Funkcie + +### Štandardný zástupný znak/wildcard * + +Expanduje sa na nula alebo viacero znakov, +vyhodnocovanie prebieha rekurzívne na všetkých úrovniach, kde sa nachádza. + +Teda napríklad 'priecinok*/subor*.txt', nájde všetky textové súbory začínajúce sa na 'subor', +vo všetkých podprečinkoch aktuálneho priečinku začínajúcich sa na 'priecinok'. + +### Rekurzívny zástupný znak/wildcard ** + +Funguje na nájdenie súborov, ktoré sú ľubovoľne hlboko. + +Teda napríklad '**/*.txt', nájde všetky textové súbory, ľubovoľne hlboko v aktuálnom priečinku. + +## Zoznam zmien do HelenOS + +- Pridanie automatizovaných testov pre zástupné znaky +- Vytvorenie funkcie na detekovanie, či reťazec obsahuje zástupný znak +- Vytvorenie funkcie na porovnanie, či sa zástupný znak pattern zhoduje s názvom súboru/priečinku +- Vytvorenie funkcie na rekurzívne expandovanie a nájdenie všetkých výskytov súborov/priečinkov, +ktoré sa zhodujú s cestou/názvom súboru obsahujúcim zástupné znaky +- Zmena v tokenizátore HelenOS shellu, tak, aby podporoval expanziu zástupných znakov diff --git a/uspace/lib/wildcards/meson.build b/uspace/lib/wildcards/meson.build new file mode 100644 index 0000000000..99ce0e5288 --- /dev/null +++ b/uspace/lib/wildcards/meson.build @@ -0,0 +1,31 @@ +# +# Copyright (c) 2025 Patrik Pritrsky +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +src = files(['src/wildcards.c']) + +test_src = files(['test/wildcards_test.c', 'src/wildcards.c']) diff --git a/uspace/lib/wildcards/src/wildcards.c b/uspace/lib/wildcards/src/wildcards.c new file mode 100644 index 0000000000..a3e28bc95a --- /dev/null +++ b/uspace/lib/wildcards/src/wildcards.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2025 Patrik Pritrsky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include "dirent.h" +#include "wildcards/wildcards.h" + + +size_t min(size_t a, size_t b); +int max(int a, int b); + + +size_t min(size_t a, size_t b) { return (a < b ? a : b); } +int max(int a, int b) { return (a > b ? a : b); } + +/** Convert UTF-8 string to array of Unicode codepoints */ +static errno_t utf8_to_codepoints(const char *utf8, uint32_t **out_cp) { + size_t len = str_length(utf8); + uint32_t *cp = malloc((len + 1) * sizeof(uint32_t)); + if (!cp){ + return ENOMEM; + } + memset(cp, 0, (len + 1) * sizeof(uint32_t)); + + size_t offset = 0; + size_t count = 0; + while (count < len) { + uint32_t ch = str_decode(utf8, &offset, str_lsize(utf8, len)); + cp[count++] = ch; + } + *out_cp = cp; + return EOK; +} + +/** Returns whether wildcard pattern matches with provided target string */ +errno_t wildcard_comp(const char *pattern, const char *target_string, bool *result){ + uint32_t *pattern_cp = NULL; + uint32_t *target_string_cp = NULL; + + errno_t rc = utf8_to_codepoints(pattern, &pattern_cp); + if (rc != EOK) { + free(pattern_cp); + return rc; + } + size_t pattern_len = str_length(pattern)+1; + rc = utf8_to_codepoints(target_string, &target_string_cp); + if (rc != EOK) { + free(pattern_cp); + return rc; + } + size_t target_string_len = str_length(target_string)+1; + + + bool **dp = malloc((pattern_len + 1) * sizeof(bool *) + (pattern_len + 1) * (target_string_len + 1) * sizeof(bool)); + if (dp == NULL) { + free(pattern_cp); + free(target_string_cp); + return ENOMEM; + } + bool *data = (bool *)(dp + pattern_len + 1); + for (size_t i = 0; i <= pattern_len; i++) { + dp[i] = data + i * (target_string_len + 1); + } + memset(data, 0, (pattern_len + 1) * (target_string_len + 1) * sizeof(bool)); + dp[0][0] = true; + + + /* + * Dynamic programming comparator for wildcard matching + */ + + + for (size_t id_sum = 0; id_sum <= pattern_len + target_string_len - 2; id_sum++){ + for (size_t i = max(0, id_sum - target_string_len + 1); i <= min(pattern_len - 1, id_sum); i++){ + size_t j = id_sum - i; + + if (pattern_cp[i] == '*'){ + dp[i + 1][j] |= dp[i][j]; + dp[i][j + 1] |= dp[i][j]; + dp[i + 1][j + 1] |= dp[i][j]; + } else if (pattern_cp[i] == '?') { + dp[i + 1][j + 1] |= dp[i][j]; + } else { + if (pattern_cp[i] == target_string_cp[j]){ + dp[i + 1][j + 1] |= dp[i][j]; + } + } + //printf("%d %d -> %d\n", i,j, dp[i][j]); + } + } + + *result = dp[pattern_len - 1][target_string_len - 1]; + + free(dp); + free(pattern_cp); + free(target_string_cp); + + + return EOK; +} + +/** Returns whether string contains wildcard '*' or '?' */ +bool contains_wildcard(const char *pattern) { + if (pattern == NULL) { + return false; + } + if (str_chr(pattern, '*') != NULL || str_chr(pattern, '?') != NULL) { + return true; + } + return false; +} + + +/** Function that expands wildcard pattern and pushes all expanded items to callback */ +errno_t expand_wildcard_patterns(const char *pattern, const char *path, wildcards_match_found_callback_t callback, void* arg) { + // printf("Expanding pattern: '%s' in path: '%s'\n", pattern, path); + if (!contains_wildcard(pattern)) { // Base case: no wildcards or end of pattern + char *full_path = NULL; + if (asprintf(&full_path, "%s%s", path, pattern) < 0) { + return ENOMEM; + } + // printf("Expanding to: '%s'\n", full_path); + errno_t rc = callback(full_path, arg); + free(full_path); + return rc; + } + + // Processing next token + + char *start_orig = str_dup(pattern); + if (start_orig == NULL) { + return ENOMEM; + } + + char *start = start_orig; + // using absolute path + if (start[0] == '/') { + start++; + path = "/"; + } + + char *slash = str_chr(start, '/'); + if (slash) { + *slash = '\0'; + } + + + DIR *dir = opendir(path); + if (!dir) { + // fprintf(stderr, "opendir failed on '%s'\n", path); + free(start_orig); + return 0; // Directory not found + } + + errno_t rc = EOK; + + // Recursive search with variable depth, check current directory + if (str_cmp(start, "**") == 0) { + rc = expand_wildcard_patterns(slash ? slash + 1 : "", path, callback, arg); + } + + struct dirent *entry; + while ((entry = readdir(dir)) && rc == EOK) { + // printf("Checking entry: %s\n", entry->d_name); + bool wildcard_match = false; + rc = wildcard_comp((char *)start, entry->d_name, &wildcard_match); + if (rc != EOK) { + closedir(dir); + free(start_orig); + return rc; // Error in wildcard comparison + } + if (wildcard_match) { + char *full_path = NULL; + + if (slash) { + if (asprintf(&full_path, "%s%s/", path, entry->d_name) < 0) { + rc = ENOMEM; + break; + } + } else { + if (asprintf(&full_path, "%s%s", path, entry->d_name) < 0) { + rc = ENOMEM; + break; + } + } + + if (str_cmp(start, "**") == 0) { // Recursive case with ** + rc = expand_wildcard_patterns(pattern, full_path, callback, arg); + } else { // Normal case + rc = expand_wildcard_patterns(slash ? slash + 1 : "", full_path, callback, arg); + } + free(full_path); + if (rc != EOK) { + break; + } + } + } + + closedir(dir); + free(start_orig); + return rc; +} + diff --git a/uspace/lib/wildcards/test/wildcards_test.c b/uspace/lib/wildcards/test/wildcards_test.c new file mode 100644 index 0000000000..894af4eb52 --- /dev/null +++ b/uspace/lib/wildcards/test/wildcards_test.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2025 Patrik Pritrsky + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "wildcards/wildcards.h" + + +PCUT_INIT; + + +static bool test_wildcard_comp(const char *pattern, const char *text) { + bool res = false; + PCUT_ASSERT_ERRNO_VAL(EOK, wildcard_comp(pattern, text, &res)); + return res; +} + + + +PCUT_TEST(basic_and_user_cases) +{ + PCUT_ASSERT_FALSE(test_wildcard_comp("aho", "Ah")); + PCUT_ASSERT_TRUE(test_wildcard_comp("n*", "nie")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a*c", "abc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a*c", "axc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a*c", "ac")); + PCUT_ASSERT_FALSE(test_wildcard_comp("abc", "ab")); + PCUT_ASSERT_FALSE(test_wildcard_comp("ab", "abc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("ab", "ab")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*", "hello")); + PCUT_ASSERT_TRUE(test_wildcard_comp("he*lo", "hello")); + PCUT_ASSERT_TRUE(test_wildcard_comp("he*lo", "helo")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*abc*", "xabcy")); + PCUT_ASSERT_FALSE(test_wildcard_comp("abc", "xyz")); +} + +PCUT_TEST(empty_string_cases) +{ + PCUT_ASSERT_TRUE(test_wildcard_comp("", "")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*", "")); + PCUT_ASSERT_FALSE(test_wildcard_comp("", "a")); + PCUT_ASSERT_FALSE(test_wildcard_comp("a", "")); + PCUT_ASSERT_FALSE(test_wildcard_comp("a*", "")); +} + +PCUT_TEST(multiple_and_consecutive_wildcard_cases) +{ + PCUT_ASSERT_TRUE(test_wildcard_comp("a**b", "axxb")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a**b", "ab")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*a*b*", "zzza_b_zzz")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*a*b*", "ab")); + PCUT_ASSERT_TRUE(test_wildcard_comp("ab**", "abxyz")); + PCUT_ASSERT_TRUE(test_wildcard_comp("***", "abc")); +} + +PCUT_TEST(complex_and_backtracking_cases) +{ + PCUT_ASSERT_TRUE(test_wildcard_comp("*a*b", "sssaaasbb")); + PCUT_ASSERT_FALSE(test_wildcard_comp("*a*b", "sssaccc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a*d", "abcd")); + PCUT_ASSERT_FALSE(test_wildcard_comp("a*d", "ab_c_e")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a*a*a", "aaaa")); + PCUT_ASSERT_FALSE(test_wildcard_comp("f*f*", "f")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*b", "aabab")); +} + +PCUT_TEST(additional_edge_cases) +{ + PCUT_ASSERT_TRUE(test_wildcard_comp("*bc", "abc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*", "*")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*abc", "ababc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("*abc", "abcabc")); +} + +PCUT_TEST(contains_wildcard) +{ + PCUT_ASSERT_TRUE(contains_wildcard("a*b")); + PCUT_ASSERT_TRUE(contains_wildcard("a?b")); + PCUT_ASSERT_FALSE(contains_wildcard("abc")); + PCUT_ASSERT_FALSE(contains_wildcard("")); + PCUT_ASSERT_TRUE(contains_wildcard("*")); + PCUT_ASSERT_TRUE(contains_wildcard("?")); + PCUT_ASSERT_TRUE(contains_wildcard("a?b?c")); + PCUT_ASSERT_TRUE(contains_wildcard("a*b*c")); +} + +PCUT_TEST(question_mark_wildcard) +{ + PCUT_ASSERT_TRUE(test_wildcard_comp("a?c", "abc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("?", "a")); + PCUT_ASSERT_FALSE(test_wildcard_comp("?", "")); + PCUT_ASSERT_FALSE(test_wildcard_comp("?", "ab")); + PCUT_ASSERT_TRUE(test_wildcard_comp("??", "ab")); + PCUT_ASSERT_FALSE(test_wildcard_comp("??", "a")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a?*", "abc")); + PCUT_ASSERT_TRUE(test_wildcard_comp("?*", "abc")); + PCUT_ASSERT_FALSE(test_wildcard_comp("a?c", "ac")); + PCUT_ASSERT_TRUE(test_wildcard_comp("a?*", "ab")); + PCUT_ASSERT_FALSE(test_wildcard_comp("a?c", "abbc")); +} + +PCUT_TEST(utf8_wildcard_tests) +{ + PCUT_ASSERT_TRUE(test_wildcard_comp("čau*", "čaučkovanie")); + PCUT_ASSERT_FALSE(test_wildcard_comp("*ďeň", "pekný deň")); + PCUT_ASSERT_TRUE(test_wildcard_comp("pä?eň", "päťeň")); + PCUT_ASSERT_FALSE(test_wildcard_comp("pä?eň", "pápeň")); + PCUT_ASSERT_TRUE(test_wildcard_comp("ž*š*", "žlté šaty")); + PCUT_ASSERT_FALSE(test_wildcard_comp("ž?š", "žlté šaty")); + PCUT_ASSERT_TRUE(test_wildcard_comp("Γειά*σου*", "Γειά σου Κόσμε")); + PCUT_ASSERT_FALSE(test_wildcard_comp("Γειά?σου*", "Γεια σου Κόσμε")); +} + + +typedef struct { + char **items; + size_t count; + size_t capacity; +} match_list_t; + +static errno_t collect_match(char *path, void *arg) { + // printf("Collected match: %s\n", path); + match_list_t *list = (match_list_t *) arg; + if (list->count >= list->capacity) { + list->capacity = (list->capacity == 0) ? 4 : list->capacity * 2; + list->items = realloc(list->items, list->capacity * sizeof(char *)); + } + list->items[list->count++] = str_dup(path); + return EOK; +} + +static bool list_check_at_index(match_list_t *list, int index, const char *value) { + if (index < 0 || (size_t)index >= list->count) { + return false; + } + return str_cmp(list->items[index], value) == 0; +} + +static void free_list(match_list_t *list) { + for (size_t i = 0; i < list->count; ++i) + free(list->items[i]); + free(list->items); + list->items = NULL; + list->count = 0; + list->capacity = 0; +} + +PCUT_TEST(wildcards_expand_tests) +{ + match_list_t matches = {0}; + + // 1. Match all txt files in current dir + expand_wildcard_patterns("*.txt", "/data/wildcards_test/", collect_match, &matches); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 0, "/data/wildcards_test/file.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 1, "/data/wildcards_test/spaced name.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 2, "/data/wildcards_test/čaute.txt")); + PCUT_ASSERT_INT_EQUALS(3, matches.count); + free_list(&matches); + + // 2. Match files in dir/* + matches = (match_list_t){0}; + expand_wildcard_patterns("dir/*", "/data/wildcards_test/", collect_match, &matches); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 0, "/data/wildcards_test/dir/file_in_dir.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 1, "/data/wildcards_test/dir/nested")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 2, "/data/wildcards_test/dir/čučoriedka.md")); + PCUT_ASSERT_INT_EQUALS(3, matches.count); + free_list(&matches); + + // 3. Match recursively + matches = (match_list_t){0}; + expand_wildcard_patterns("dir/*/*.txt", "/data/wildcards_test/", collect_match, &matches); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 0, "/data/wildcards_test/dir/nested/deep.txt")); + PCUT_ASSERT_INT_EQUALS(1, matches.count); + free_list(&matches); + + // 4. UTF-8 wildcard match + matches = (match_list_t){0}; + expand_wildcard_patterns("*č*", "/data/wildcards_test/", collect_match, &matches); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 0, "/data/wildcards_test/čaute.txt")); + PCUT_ASSERT_INT_EQUALS(1, matches.count); + free_list(&matches); + + // 5. Recursive wildcard ** + matches = (match_list_t){0}; + expand_wildcard_patterns("**/*.txt", "/data/wildcards_test/", collect_match, &matches); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 0, "/data/wildcards_test/file.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 1, "/data/wildcards_test/spaced name.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 2, "/data/wildcards_test/čaute.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 3, "/data/wildcards_test/dir/file_in_dir.txt")); + PCUT_ASSERT_TRUE(list_check_at_index(&matches, 4, "/data/wildcards_test/dir/nested/deep.txt")); + PCUT_ASSERT_INT_EQUALS(5, matches.count); + free_list(&matches); +} + + +PCUT_MAIN(); +