ai-station/.venv/lib/python3.12/site-packages/textual/fuzzy.py

225 lines
6.8 KiB
Python
Raw Permalink Normal View History

2025-12-25 14:54:33 +00:00
"""
Fuzzy matcher.
This class is used by the [command palette](/guide/command_palette) to match search terms.
"""
from __future__ import annotations
from functools import lru_cache
from operator import itemgetter
from re import finditer
from typing import Iterable, Sequence
import rich.repr
from textual.cache import LRUCache
from textual.content import Content
from textual.visual import Style
class FuzzySearch:
"""Performs a fuzzy search.
Unlike a regex solution, this will finds all possible matches.
"""
def __init__(
self, case_sensitive: bool = False, *, cache_size: int = 1024 * 4
) -> None:
"""Initialize fuzzy search.
Args:
case_sensitive: Is the match case sensitive?
cache_size: Number of queries to cache.
"""
self.case_sensitive = case_sensitive
self.cache: LRUCache[tuple[str, str], tuple[float, Sequence[int]]] = LRUCache(
cache_size
)
def match(self, query: str, candidate: str) -> tuple[float, Sequence[int]]:
"""Match against a query.
Args:
query: The fuzzy query.
candidate: A candidate to check,.
Returns:
A pair of (score, tuple of offsets). `(0, ())` for no result.
"""
cache_key = (query, candidate)
if cache_key in self.cache:
return self.cache[cache_key]
default: tuple[float, Sequence[int]] = (0.0, [])
result = max(self._match(query, candidate), key=itemgetter(0), default=default)
self.cache[cache_key] = result
return result
@classmethod
@lru_cache(maxsize=1024)
def get_first_letters(cls, candidate: str) -> frozenset[int]:
return frozenset({match.start() for match in finditer(r"\w+", candidate)})
def score(self, candidate: str, positions: Sequence[int]) -> float:
"""Score a search.
Args:
search: Search object.
Returns:
Score.
"""
first_letters = self.get_first_letters(candidate)
# This is a heuristic, and can be tweaked for better results
# Boost first letter matches
offset_count = len(positions)
score: float = offset_count + len(first_letters.intersection(positions))
groups = 1
last_offset, *offsets = positions
for offset in offsets:
if offset != last_offset + 1:
groups += 1
last_offset = offset
# Boost to favor less groups
normalized_groups = (offset_count - (groups - 1)) / offset_count
score *= 1 + (normalized_groups * normalized_groups)
return score
def _match(
self, query: str, candidate: str
) -> Iterable[tuple[float, Sequence[int]]]:
letter_positions: list[list[int]] = []
position = 0
if not self.case_sensitive:
candidate = candidate.lower()
query = query.lower()
score = self.score
if query in candidate:
# Quick exit when the query exists as a substring
query_location = candidate.find(query)
offsets = list(range(query_location, query_location + len(query)))
yield (
score(candidate, offsets) * (2.0 if candidate == query else 1.5),
offsets,
)
return
for offset, letter in enumerate(query):
last_index = len(candidate) - offset
positions: list[int] = []
letter_positions.append(positions)
index = position
while (location := candidate.find(letter, index)) != -1:
positions.append(location)
index = location + 1
if index >= last_index:
break
if not positions:
yield (0.0, ())
return
position = positions[0] + 1
possible_offsets: list[list[int]] = []
query_length = len(query)
def get_offsets(offsets: list[int], positions_index: int) -> None:
"""Recursively match offsets.
Args:
offsets: A list of offsets.
positions_index: Index of query letter.
"""
for offset in letter_positions[positions_index]:
if not offsets or offset > offsets[-1]:
new_offsets = [*offsets, offset]
if len(new_offsets) == query_length:
possible_offsets.append(new_offsets)
else:
get_offsets(new_offsets, positions_index + 1)
get_offsets([], 0)
for offsets in possible_offsets:
yield score(candidate, offsets), offsets
@rich.repr.auto
class Matcher:
"""A fuzzy matcher."""
def __init__(
self,
query: str,
*,
match_style: Style | None = None,
case_sensitive: bool = False,
) -> None:
"""Initialise the fuzzy matching object.
Args:
query: A query as typed in by the user.
match_style: The style to use to highlight matched portions of a string.
case_sensitive: Should matching be case sensitive?
"""
self._query = query
self._match_style = Style(reverse=True) if match_style is None else match_style
self._case_sensitive = case_sensitive
self.fuzzy_search = FuzzySearch()
@property
def query(self) -> str:
"""The query string to look for."""
return self._query
@property
def match_style(self) -> Style:
"""The style that will be used to highlight hits in the matched text."""
return self._match_style
@property
def case_sensitive(self) -> bool:
"""Is this matcher case sensitive?"""
return self._case_sensitive
def match(self, candidate: str) -> float:
"""Match the candidate against the query.
Args:
candidate: Candidate string to match against the query.
Returns:
Strength of the match from 0 to 1.
"""
return self.fuzzy_search.match(self.query, candidate)[0]
def highlight(self, candidate: str) -> Content:
"""Highlight the candidate with the fuzzy match.
Args:
candidate: The candidate string to match against the query.
Returns:
A [`Text`][rich.text.Text] object with highlighted matches.
"""
content = Content.from_markup(candidate)
score, offsets = self.fuzzy_search.match(self.query, candidate)
if not score:
return content
for offset in offsets:
if not candidate[offset].isspace():
content = content.stylize(self._match_style, offset, offset + 1)
return content
if __name__ == "__main__":
fuzzy_search = FuzzySearch()
fuzzy_search.match("foo.bar", "foo/egg.bar")