I need to simply filter folders or files when traversing directories. Here I wrote some simple code that can achieve this function by calling the C++11 regex library.
/*
Used for simple filtering of directories and files during scanning, using wildcards ? and *
*/
#include <iostream>
#include <regex>
#include <string>
#include <vector>
#include <unordered_map>
//Cache compiled regular expressions. Faster in scenarios with frequent calls.
std::unordered_map<std::string, std::regex> g_regexCache;
//Split path string
std::vector<std::string> split_path_str(const std::string& str) {
std::vector<std::string> result;
std::string current;
for (char ch : str) {
if (ch == '/') {
if (!current.empty()) {
result.push_back(current);
current.clear();
}
} else {
current += ch;
}
}
// Add the last segment
if (!current.empty()) {
result.push_back(current);
current.clear();
}
return result;
}
bool is_regex_special_char(char c) {
return c == '.' || c == '+' || c == '(' || c == ')' ||
c == '[' || c == ']' || c == '{' || c == '}' ||
c == '^' || c == '$' || c == '|' || c == '\\';
}
std::regex compile_pattern(const std::string& pattern) {
std::string regexPattern;
for (char c : pattern) {
if (c == '*') {
regexPattern += ".*";
} else if (c == '?') {
regexPattern += ".";
} else if (is_regex_special_char(c)) {
regexPattern += '\\';
regexPattern += c;
} else {
regexPattern += c;
}
}
return std::regex(regexPattern);
}
bool match_a_single_segment(const std::string& str, const std::string& pattern) {
// Find compiled regular expressions
auto it = g_regexCache.find(pattern);
if (it == g_regexCache.end()) {
//The return value 'first' refers to the iterator pointing to the inserted element, and 'second' is the flag indicating whether the insertion was successful.
//Here, it is reassigned to point to the position of the inserted element
it = g_regexCache.emplace(pattern, compile_pattern(pattern)).first;
}
return std::regex_match(str, it->second);
}
bool wildcard_matching_single_pattern(const std::string& str, const std::string& pattern){
if((str.back()=='/'&&pattern.back()!='/')||(str.back()!='/'&&pattern.back()=='/')){
//Ignore situation: Folder pattern matches a file or file pattern matches a folder
return false;
}
std::vector<std::string> splitedStr = split_path_str(str);
std::vector<std::string> splitedPattern = split_path_str(pattern);
if(splitedPattern.size()>splitedStr.size()){
return false;
}
//Match the pattern in reverse from the end, each segment must be fully matched
while(!splitedPattern.empty()){
bool segResult = match_a_single_segment(splitedStr.back(),splitedPattern.back());
if(!segResult){
return false;
}
splitedStr.pop_back();
splitedPattern.pop_back();
}
return true;
}
bool wildcard_matching_muti_pattern(const std::string& str, std::vector<std::string> &patterns){
bool matched = false;
for(auto &pattern:patterns){
matched = wildcard_matching_single_pattern(str, pattern);
if(matched){
return true;
}
}
return false;
}
int main() {
std::cout << wildcard_matching_single_pattern("d:/xxx/aaa/hello.txt", "*.txt") << "\n";//1
std::cout << wildcard_matching_single_pattern("d:/xxx/a0a/hello.txt", "a?a/*") << "\n";//1
std::cout << wildcard_matching_single_pattern("d:/xxx/bbb/hello.txt", "a?a/*") << "\n";//0
std::cout << wildcard_matching_single_pattern("d:/ddd/ccc/ee1/", "ee?/") << "\n";//1
std::cout << wildcard_matching_single_pattern("d:/ddd/ccc/eee/", "aaa/eee/") << "\n";//0
//Cannot match subfolders
std::cout << wildcard_matching_single_pattern("d:/ddd/ccc/eee/", "ccc/") << "\n";//0
std::vector<std::string> patterns;
patterns.push_back("eee/ddd/");
patterns.push_back("*.cpp");
patterns.push_back("aaa/*");
std::cout << wildcard_matching_muti_pattern("d:/xxx/aaa/", patterns) << "\n";//0
std::cout << wildcard_matching_muti_pattern("d:/xxx/aaa/aww.txt", patterns) << "\n";//1
std::cout << wildcard_matching_muti_pattern("/xxx/aaa/ddd/", patterns) << "\n";//0
std::cout << wildcard_matching_muti_pattern("/xxx/eee/ddd/", patterns) << "\n";//1
return 0;
}It can match patterns like *.txt (all txt files), aaa/*.txt (all txt files in the parent directory aaa/), abc/ (all folders named abc/), bbb/abc/ (all folders named abc/ under the parent directory bbb/), ?aa.txt (matches any single character, e.g., 1aa.txt)
It distinguishes whether the pattern is meant to match a file or a folder based on whether the end of the path is '/'. All patterns ending with '/' are used to match folders, and all paths ending with '/' are considered folders. All patterns not ending with '/' are used to match files, and all paths not ending with '/' are considered files.
The correct usage scenario for it is: traversing code to match the currently scanned folder or file path. Therefore, it cannot match subpaths. For example, a pattern like ccc/ can match a directory like /ddd/ccc/, but it cannot match a directory like /ddd/ccc/eee/. So if you perform the matching after scanning all directories and files, it will not be able to correctly match the contents of subdirectories.
If it is this kind of algorithm, it can match the contents of subdirectories, but it is suitable for determining whether a path is a subpath of a folder.
bool wildcard_matching(const std::string& str, const std::string& pattern){
std::vector<std::string> splitedStr = split_path_str(str);
std::vector<std::string> splitedPattern = split_path_str(pattern);
if(splitedPattern.size()>splitedStr.size()){
return false;
}
//There must be a place with a full continuous match
for(int i=0;i<splitedStr.size()-splitedPattern.size()+1;i++){
bool allMached = true;
for(int j=0;j<splitedPattern.size();j++){
bool segResult = match_a_single_segment(splitedStr.at(i+j),splitedPattern.at(j));
if(!segResult){
allMached = false;
break;
}
}
if(allMached){
return true;
}
}
return false;
}