225 lines
6.8 KiB
Python
225 lines
6.8 KiB
Python
"""
|
|
Fuzzy matcher.
|
|
|
|
This class is used by the [command palette](/guide/command_palette) to match search terms.
|
|
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from functools import lru_cache
|
|
from operator import itemgetter
|
|
from re import finditer
|
|
from typing import Iterable, Sequence
|
|
|
|
import rich.repr
|
|
|
|
from textual.cache import LRUCache
|
|
from textual.content import Content
|
|
from textual.visual import Style
|
|
|
|
|
|
class FuzzySearch:
|
|
"""Performs a fuzzy search.
|
|
|
|
Unlike a regex solution, this will finds all possible matches.
|
|
"""
|
|
|
|
def __init__(
|
|
self, case_sensitive: bool = False, *, cache_size: int = 1024 * 4
|
|
) -> None:
|
|
"""Initialize fuzzy search.
|
|
|
|
Args:
|
|
case_sensitive: Is the match case sensitive?
|
|
cache_size: Number of queries to cache.
|
|
"""
|
|
|
|
self.case_sensitive = case_sensitive
|
|
self.cache: LRUCache[tuple[str, str], tuple[float, Sequence[int]]] = LRUCache(
|
|
cache_size
|
|
)
|
|
|
|
def match(self, query: str, candidate: str) -> tuple[float, Sequence[int]]:
|
|
"""Match against a query.
|
|
|
|
Args:
|
|
query: The fuzzy query.
|
|
candidate: A candidate to check,.
|
|
|
|
Returns:
|
|
A pair of (score, tuple of offsets). `(0, ())` for no result.
|
|
"""
|
|
|
|
cache_key = (query, candidate)
|
|
if cache_key in self.cache:
|
|
return self.cache[cache_key]
|
|
default: tuple[float, Sequence[int]] = (0.0, [])
|
|
result = max(self._match(query, candidate), key=itemgetter(0), default=default)
|
|
self.cache[cache_key] = result
|
|
return result
|
|
|
|
@classmethod
|
|
@lru_cache(maxsize=1024)
|
|
def get_first_letters(cls, candidate: str) -> frozenset[int]:
|
|
return frozenset({match.start() for match in finditer(r"\w+", candidate)})
|
|
|
|
def score(self, candidate: str, positions: Sequence[int]) -> float:
|
|
"""Score a search.
|
|
|
|
Args:
|
|
search: Search object.
|
|
|
|
Returns:
|
|
Score.
|
|
"""
|
|
first_letters = self.get_first_letters(candidate)
|
|
# This is a heuristic, and can be tweaked for better results
|
|
# Boost first letter matches
|
|
offset_count = len(positions)
|
|
score: float = offset_count + len(first_letters.intersection(positions))
|
|
|
|
groups = 1
|
|
last_offset, *offsets = positions
|
|
for offset in offsets:
|
|
if offset != last_offset + 1:
|
|
groups += 1
|
|
last_offset = offset
|
|
|
|
# Boost to favor less groups
|
|
normalized_groups = (offset_count - (groups - 1)) / offset_count
|
|
score *= 1 + (normalized_groups * normalized_groups)
|
|
return score
|
|
|
|
def _match(
|
|
self, query: str, candidate: str
|
|
) -> Iterable[tuple[float, Sequence[int]]]:
|
|
letter_positions: list[list[int]] = []
|
|
position = 0
|
|
|
|
if not self.case_sensitive:
|
|
candidate = candidate.lower()
|
|
query = query.lower()
|
|
score = self.score
|
|
if query in candidate:
|
|
# Quick exit when the query exists as a substring
|
|
query_location = candidate.find(query)
|
|
offsets = list(range(query_location, query_location + len(query)))
|
|
yield (
|
|
score(candidate, offsets) * (2.0 if candidate == query else 1.5),
|
|
offsets,
|
|
)
|
|
return
|
|
|
|
for offset, letter in enumerate(query):
|
|
last_index = len(candidate) - offset
|
|
positions: list[int] = []
|
|
letter_positions.append(positions)
|
|
index = position
|
|
while (location := candidate.find(letter, index)) != -1:
|
|
positions.append(location)
|
|
index = location + 1
|
|
if index >= last_index:
|
|
break
|
|
if not positions:
|
|
yield (0.0, ())
|
|
return
|
|
position = positions[0] + 1
|
|
|
|
possible_offsets: list[list[int]] = []
|
|
query_length = len(query)
|
|
|
|
def get_offsets(offsets: list[int], positions_index: int) -> None:
|
|
"""Recursively match offsets.
|
|
|
|
Args:
|
|
offsets: A list of offsets.
|
|
positions_index: Index of query letter.
|
|
|
|
"""
|
|
for offset in letter_positions[positions_index]:
|
|
if not offsets or offset > offsets[-1]:
|
|
new_offsets = [*offsets, offset]
|
|
if len(new_offsets) == query_length:
|
|
possible_offsets.append(new_offsets)
|
|
else:
|
|
get_offsets(new_offsets, positions_index + 1)
|
|
|
|
get_offsets([], 0)
|
|
|
|
for offsets in possible_offsets:
|
|
yield score(candidate, offsets), offsets
|
|
|
|
|
|
@rich.repr.auto
|
|
class Matcher:
|
|
"""A fuzzy matcher."""
|
|
|
|
def __init__(
|
|
self,
|
|
query: str,
|
|
*,
|
|
match_style: Style | None = None,
|
|
case_sensitive: bool = False,
|
|
) -> None:
|
|
"""Initialise the fuzzy matching object.
|
|
|
|
Args:
|
|
query: A query as typed in by the user.
|
|
match_style: The style to use to highlight matched portions of a string.
|
|
case_sensitive: Should matching be case sensitive?
|
|
"""
|
|
self._query = query
|
|
self._match_style = Style(reverse=True) if match_style is None else match_style
|
|
self._case_sensitive = case_sensitive
|
|
self.fuzzy_search = FuzzySearch()
|
|
|
|
@property
|
|
def query(self) -> str:
|
|
"""The query string to look for."""
|
|
return self._query
|
|
|
|
@property
|
|
def match_style(self) -> Style:
|
|
"""The style that will be used to highlight hits in the matched text."""
|
|
return self._match_style
|
|
|
|
@property
|
|
def case_sensitive(self) -> bool:
|
|
"""Is this matcher case sensitive?"""
|
|
return self._case_sensitive
|
|
|
|
def match(self, candidate: str) -> float:
|
|
"""Match the candidate against the query.
|
|
|
|
Args:
|
|
candidate: Candidate string to match against the query.
|
|
|
|
Returns:
|
|
Strength of the match from 0 to 1.
|
|
"""
|
|
return self.fuzzy_search.match(self.query, candidate)[0]
|
|
|
|
def highlight(self, candidate: str) -> Content:
|
|
"""Highlight the candidate with the fuzzy match.
|
|
|
|
Args:
|
|
candidate: The candidate string to match against the query.
|
|
|
|
Returns:
|
|
A [`Text`][rich.text.Text] object with highlighted matches.
|
|
"""
|
|
content = Content.from_markup(candidate)
|
|
score, offsets = self.fuzzy_search.match(self.query, candidate)
|
|
if not score:
|
|
return content
|
|
for offset in offsets:
|
|
if not candidate[offset].isspace():
|
|
content = content.stylize(self._match_style, offset, offset + 1)
|
|
return content
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fuzzy_search = FuzzySearch()
|
|
fuzzy_search.match("foo.bar", "foo/egg.bar")
|