"""Provides a utility function and class for creating Markdown-friendly slugs. The approach to creating slugs is designed to be as close to GitHub-flavoured Markdown as possible. However, because there doesn't appear to be any actual documentation for this 'standard', the code here involves some guesswork and also some pragmatic shortcuts. Expect this to grow over time. The main rules used in here at the moment are: 1. Strip all leading and trailing whitespace. 2. Remove all non-lingual characters (emoji, etc). 3. Remove all punctuation and whitespace apart from dash and underscore. """ from __future__ import annotations from collections import defaultdict from re import compile from string import punctuation from typing import Pattern from urllib.parse import quote from typing_extensions import Final WHITESPACE_REPLACEMENT: Final[str] = "-" """The character to replace undesirable characters with.""" REMOVABLE: Final[str] = punctuation.replace(WHITESPACE_REPLACEMENT, "").replace("_", "") """The collection of characters that should be removed altogether.""" NONLINGUAL: Final[str] = ( r"\U000024C2-\U0001F251" r"\U00002702-\U000027B0" r"\U0001F1E0-\U0001F1FF" r"\U0001F300-\U0001F5FF" # Miscellaneous Symbols And Pictographs r"\U0001F600-\U0001F64F" # Emoticons r"\U0001F680-\U0001F6FF" # Transport and Map Symbols r"\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs r"\u200D" r"\u2640-\u2642" ) """A string that can be used in a regular expression to remove most non-lingual characters.""" STRIP_RE: Final[Pattern] = compile(f"[{REMOVABLE}{NONLINGUAL}]+") """A regular expression for finding all the characters that should be removed.""" WHITESPACE_RE: Final[Pattern] = compile(r"\s") """A regular expression for finding all the whitespace and turning it into `REPLACEMENT`.""" def slug(text: str) -> str: """Create a Markdown-friendly slug from the given text. Args: text: The text to generate a slug from. Returns: A slug for the given text. The rules used in generating the slug are based on observations of how GitHub-flavoured Markdown works. """ result = text.strip().lower() for rule, replacement in ( (STRIP_RE, ""), (WHITESPACE_RE, WHITESPACE_REPLACEMENT), ): result = rule.sub(replacement, result) return quote(result) class TrackedSlugs: """Provides a class for generating tracked slugs. While [`slug`][textual._slug.slug] will generate a slug for a given string, it does not guarantee that it is unique for a given context. If you want to ensure that the same string generates unique slugs (perhaps heading slugs within a Markdown document, as an example), use an instance of this class to generate them. Example: ```python >>> slug("hello world") 'hello-world' >>> slug("hello world") 'hello-world' >>> unique = TrackedSlugs() >>> unique.slug("hello world") 'hello-world' >>> unique.slug("hello world") 'hello-world-1' ``` """ def __init__(self) -> None: """Initialise the tracked slug object.""" self._used: defaultdict[str, int] = defaultdict(int) """Keeps track of how many times a particular slug has been used.""" def slug(self, text: str) -> str: """Create a Markdown-friendly unique slug from the given text. Args: text: The text to generate a slug from. Returns: A slug for the given text. """ slugged = slug(text) used = self._used[slugged] self._used[slugged] += 1 if used: slugged = f"{slugged}-{used}" return slugged VALID_ID_CHARACTERS = frozenset("abcdefghijklmnopqrstuvwxyz0123456789-") def slug_for_tcss_id(text: str) -> str: """Produce a slug usable as a TCSS id from the given text. Args: text: Text. Returns: A slugified version of text suitable for use as a TCSS id. """ is_valid = VALID_ID_CHARACTERS.__contains__ slug = "".join( (character if is_valid(character) else "{:x}".format(ord(character))) for character in text.casefold().replace(" ", "-") ) if not slug: return "_" if slug[0].isdecimal(): return f"_{slug}" return slug