diff --git a/src/match.c b/src/match.c index 29936ce..e746a55 100644 --- a/src/match.c +++ b/src/match.c @@ -29,7 +29,7 @@ #define SCORE_MATCH_CONSECUTIVE 1.0 // we got a consecutive match, but insensitive is on // and we didn't match the case. -#define SCORE_MATCH_NOT_MATCH_CASE 0.9 +#define SCORE_MATCH_NOT_MATCH_CASE 0.95 // we are matching after a slash #define SCORE_MATCH_SLASH 0.9 // we are matching after a space dash or hyphen @@ -38,6 +38,10 @@ #define SCORE_MATCH_CAPITAL 0.7 // we are matching after a dot #define SCORE_MATCH_DOT 0.6 +// bonus for exact substring match at start +#define SCORE_MATCH_STARTS_WITH 2.0 +// bonus for exact substring match anywhere +#define SCORE_MATCH_CONTAINS 1.5 #define SWAP(x, y, T) \ do { \ @@ -170,15 +174,25 @@ static inline bool match_with_case(char a, char b, bool insensitive) { } } +// Check if a character is just a separator (not a real gap) +static inline bool is_separator(char c) { + return c == '.' || c == '-' || c == '_' || c == ' ' || c == ':' || c == '\''; +} + static inline void match_row(int row, score_t* curr_D, score_t* curr_M, const score_t* last_D, const score_t* last_M, const char* needle, const char* haystack, int n, int m, score_t* match_bonus, bool insensitive) { int i = row; score_t prev_score = SCORE_MIN; - score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; + // Base gap score + score_t base_gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; for(int j = 0; j < m; j++) { + // Separators are FREE - they don't penalize at all + bool sep = is_separator(haystack[j]); + score_t gap_score = sep ? 0 : base_gap_score; + if(match_with_case(needle[i], haystack[j], insensitive)) { score_t score = SCORE_MIN; if(!i) { @@ -189,10 +203,15 @@ static inline void match_row(int row, score_t* curr_D, score_t* curr_M, // our character isn't the same then we have a different case score_t consecutive_bonus = needle[i] == haystack[j] ? SCORE_MATCH_CONSECUTIVE : SCORE_MATCH_NOT_MATCH_CASE; - score = max(last_M[j - 1] + match_bonus[j], + // Bonus for matching right after a separator (e.g., R.E.P.O) + // This treats separator-delimited matches like acronyms + bool after_separator = j > 0 && is_separator(haystack[j-1]); + score_t sep_bonus = after_separator ? 0.8 : 0; + + score = max(last_M[j - 1] + match_bonus[j] + sep_bonus, /* consecutive match, doesn't stack with match_bonus */ - last_D[j - 1] + consecutive_bonus); + last_D[j - 1] + consecutive_bonus + sep_bonus); } curr_D[j] = score; curr_M[j] = prev_score = max(score, prev_score + gap_score); @@ -228,6 +247,38 @@ static inline void match_row(int row, score_t* curr_D, score_t* curr_M, // Also, the reference algorithm does not take into account case sensitivity // which has been implemented here. +// Check if needle matches haystack as a "substring" ignoring separators +// e.g., "R.E.P.O" starts_with_ignore_sep("repo") = true +static bool starts_with_ignore_sep(const char* haystack, const char* needle, bool insensitive) { + while(*needle) { + // Skip separators in haystack + while(*haystack && is_separator(*haystack)) haystack++; + if(!*haystack) return false; + if(insensitive) { + if(tolower(*haystack) != tolower(*needle)) return false; + } else { + if(*haystack != *needle) return false; + } + haystack++; + needle++; + } + return true; +} + +// Check if needle appears anywhere in haystack ignoring separators +static bool contains_ignore_sep(const char* haystack, const char* needle, bool insensitive) { + int nlen = strlen(needle); + if(nlen == 0) return true; + + // Try starting at each position in haystack + for(const char* p = haystack; *p; p++) { + // Skip separators to find a potential start + if(is_separator(*p)) continue; + if(starts_with_ignore_sep(p, needle, insensitive)) return true; + } + return false; +} + static score_t fuzzy_score(const char* haystack, const char* needle, bool insensitive) { if(*needle == 0) return SCORE_MIN; @@ -252,6 +303,22 @@ static score_t fuzzy_score(const char* haystack, const char* needle, bool insens return SCORE_MAX; } + /* Give a bonus for substring matches - this makes the search + * prefer "firefox" over "fi...r...e...f...o...x" when searching + * for "firefox". This makes the results feel much more natural. + * + * IMPORTANT: We also check ignoring separators, so "R.E.P.O" + * gets the same bonus as "REPO" when searching for "repo". + */ + score_t substring_bonus = 0; + if(starts_with_ignore_sep(haystack, needle, insensitive)) { + /* Starts with the search term (ignoring separators) - highest bonus */ + substring_bonus = SCORE_MATCH_STARTS_WITH; + } else if(contains_ignore_sep(haystack, needle, insensitive)) { + /* Contains the search term as substring (ignoring separators) */ + substring_bonus = SCORE_MATCH_CONTAINS; + } + /* * D[][] Stores the best score for this position ending with a match. * M[][] Stores the best possible score at this position. @@ -273,7 +340,7 @@ static score_t fuzzy_score(const char* haystack, const char* needle, bool insens SWAP(curr_M, last_M, score_t *); } - return last_M[m - 1]; + return last_M[m - 1] + substring_bonus; } // end fuzzy matching diff --git a/src/wofi.c b/src/wofi.c index 4b6442f..dd21839 100644 --- a/src/wofi.c +++ b/src/wofi.c @@ -1871,8 +1871,8 @@ void wofi_init(struct map* _config) { char* password_char = map_get(config, "password_char"); exec_search = strcmp(config_get(config, "exec_search", "false"), "true") == 0; bool hide_scroll = strcmp(config_get(config, "hide_scroll", "false"), "true") == 0; - matching = config_get_mnemonic(config, "matching", "contains", 3, "contains", "multi-contains", "fuzzy"); - insensitive = strcmp(config_get(config, "insensitive", "false"), "true") == 0; + matching = config_get_mnemonic(config, "matching", "fuzzy", 3, "contains", "multi-contains", "fuzzy"); + insensitive = strcmp(config_get(config, "insensitive", "true"), "true") == 0; parse_search = strcmp(config_get(config, "parse_search", "false"), "true") == 0; location = config_get_mnemonic(config, "location", "center", 18, "center", "top_left", "top", "top_right", "right", "bottom_right", "bottom", "bottom_left", "left",