from __future__ import annotations import re from typing import Iterable from rich.cells import get_character_cell_size from textual._cells import cell_len from textual._loop import loop_last from textual.expand_tabs import get_tab_widths re_chunk = re.compile(r"\S+\s*|\s+") def chunks(text: str) -> Iterable[tuple[int, int, str]]: """Yields each "chunk" from the text as a tuple containing (start_index, end_index, chunk_content). A "chunk" in this context refers to a word and any whitespace around it. Args: text: The text to split into chunks. Returns: Yields tuples containing the start, end and content for each chunk. """ end = 0 while (chunk_match := re_chunk.match(text, end)) is not None: start, end = chunk_match.span() chunk = chunk_match.group(0) yield start, end, chunk def compute_wrap_offsets( text: str, width: int, tab_size: int, fold: bool = True, precomputed_tab_sections: list[tuple[str, int]] | None = None, ) -> list[int]: """Given a string of text, and a width (measured in cells), return a list of codepoint indices which the string should be split at in order for it to fit within the given width. Args: text: The text to examine. width: The available cell width. tab_size: The tab stop width. fold: If True, words longer than `width` will be folded onto a new line. precomputed_tab_sections: The output of `get_tab_widths` can be passed here directly, to prevent us from having to recompute the value. Returns: A list of indices to break the line at. """ tab_size = min(tab_size, width) if precomputed_tab_sections: tab_sections = precomputed_tab_sections else: tab_sections = get_tab_widths(text, tab_size) break_positions: list[int] = [] # offsets to insert the breaks at append = break_positions.append cell_offset = 0 _cell_len = cell_len tab_section_index = 0 cumulative_width = 0 cumulative_widths: list[int] = [] # prefix sum of tab widths for each codepoint record_widths = cumulative_widths.extend for last, (tab_section, tab_width) in loop_last(tab_sections): # add 1 since the \t character is stripped by get_tab_widths section_codepoint_length = len(tab_section) + int(bool(tab_width)) widths = [cumulative_width] * section_codepoint_length record_widths(widths) cumulative_width += tab_width if last: cumulative_widths.append(cumulative_width) for start, end, chunk in chunks(text): chunk_width = _cell_len(chunk) # this cell len excludes tabs completely tab_width_before_start = cumulative_widths[start] tab_width_before_end = cumulative_widths[end] chunk_tab_width = tab_width_before_end - tab_width_before_start chunk_width += chunk_tab_width remaining_space = width - cell_offset chunk_fits = remaining_space >= chunk_width if chunk_fits: # Simplest case - the word fits within the remaining width for this line. cell_offset += chunk_width else: # Not enough space remaining for this word on the current line. if chunk_width > width: # The word doesn't fit on any line, so we must fold it if fold: _get_character_cell_size = get_character_cell_size lines: list[list[str]] = [[]] append_new_line = lines.append append_to_last_line = lines[-1].append total_width = 0 for character in chunk: if character == "\t": # Tab characters have dynamic width, so look it up cell_width = tab_sections[tab_section_index][1] tab_section_index += 1 else: cell_width = _get_character_cell_size(character) if total_width + cell_width > width: append_new_line([character]) append_to_last_line = lines[-1].append total_width = cell_width else: append_to_last_line(character) total_width += cell_width folded_word = ["".join(line) for line in lines] for last, line in loop_last(folded_word): if start: append(start) if last: # Since cell_len ignores tabs, we need to check the width # of the tabs in this line. The width of tabs within the # line is computed by taking the difference between the # cumulative width of tabs up to the end of the line and the # cumulative width of tabs up to the start of the line. line_tab_widths = ( cumulative_widths[start + len(line)] - cumulative_widths[start] ) cell_offset = _cell_len(line) + line_tab_widths else: start += len(line) else: # Folding isn't allowed, so crop the word. if start: append(start) cell_offset = chunk_width elif cell_offset and start: # The word doesn't fit within the remaining space on the current # line, but it *can* fit on to the next (empty) line. append(start) cell_offset = chunk_width return break_positions