import os from .tsl import USING_TSL_PACK # Updated mapping of file extensions to parsers PARSERS = { ".py": "python", ".js": "javascript", ".mjs": "javascript", # mjs file extension stands for "module JavaScript." ".go": "go", ".bash": "bash", ".c": "c", ".cc": "cpp", ".cs": "c_sharp", ".cl": "commonlisp", ".cpp": "cpp", ".css": "css", ".dockerfile": "dockerfile", ".dot": "dot", ".el": "elisp", ".ex": "elixir", ".elm": "elm", ".et": "embedded_template", ".erl": "erlang", ".gomod": "gomod", ".hack": "hack", ".hs": "haskell", ".hcl": "hcl", ".html": "html", ".java": "java", ".jsdoc": "jsdoc", ".json": "json", ".jl": "julia", ".kt": "kotlin", ".lua": "lua", ".mk": "make", ".md": "markdown", # https://github.com/ikatyang/tree-sitter-markdown/issues/59 ".m": "objc", ".ml": "ocaml", ".mli": "ocaml_interface", ".pl": "perl", ".php": "php", ".ql": "ql", ".r": "r", ".R": "r", ".regex": "regex", ".rst": "rst", ".rb": "ruby", ".rs": "rust", ".scala": "scala", ".sql": "sql", ".sqlite": "sqlite", ".tf": "hcl", ".toml": "toml", ".tsq": "tsq", ".tsx": "typescript", ".ts": "typescript", ".yaml": "yaml", } if USING_TSL_PACK: # Replace the PARSERS dictionary with a comprehensive mapping based on the language pack PARSERS = { # A ".as": "actionscript", ".adb": "ada", ".ads": "ada", ".agda": "agda", ".ino": "arduino", ".asm": "asm", ".s": "asm", ".astro": "astro", # B ".sh": "bash", ".bash": "bash", ".zsh": "bash", ".bean": "beancount", ".bib": "bibtex", ".bicep": "bicep", ".bb": "bitbake", ".bbappend": "bitbake", ".bbclass": "bitbake", # C ".c": "c", ".h": "c", ".cairo": "cairo", ".capnp": "capnp", ".chatito": "chatito", ".clar": "clarity", ".clj": "clojure", ".cljs": "clojure", ".cljc": "clojure", ".edn": "clojure", ".cmake": "cmake", "CMakeLists.txt": "cmake", ".lisp": "commonlisp", ".cl": "commonlisp", ".cpon": "cpon", ".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp", ".hpp": "cpp", ".hxx": "cpp", ".h++": "cpp", ".cs": "csharp", ".css": "css", ".csv": "csv", ".cu": "cuda", ".cuh": "cuda", ".d": "d", # D ".dart": "dart", "Dockerfile": "dockerfile", ".dtd": "dtd", # E ".el": "elisp", ".ex": "elixir", ".exs": "elixir", ".elm": "elm", ".erl": "erlang", ".hrl": "erlang", # F ".fnl": "fennel", ".fir": "firrtl", ".fish": "fish", ".f": "fortran", ".f90": "fortran", ".f95": "fortran", ".f03": "fortran", ".f08": "fortran", ".fc": "func", # G ".gd": "gdscript", ".gitattributes": "gitattributes", ".gitcommit": "gitcommit", ".gitignore": "gitignore", ".gleam": "gleam", ".glsl": "glsl", ".vert": "glsl", ".frag": "glsl", ".gn": "gn", ".gni": "gn", ".go": "go", "go.mod": "gomod", "go.sum": "gosum", ".groovy": "groovy", ".launch": "gstlaunch", # H ".hack": "hack", ".ha": "hare", ".hs": "haskell", ".hx": "haxe", ".hcl": "hcl", ".tf": "hcl", ".tfvars": "hcl", ".heex": "heex", ".hlsl": "hlsl", ".html": "html", ".htm": "html", ".hypr": "hyprlang", # I ".ispc": "ispc", # J ".janet": "janet", ".java": "java", ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript", ".jsdoc": "jsdoc", ".json": "json", ".jsonnet": "jsonnet", ".libsonnet": "jsonnet", ".jl": "julia", # K "Kconfig": "kconfig", ".kdl": "kdl", ".kt": "kotlin", ".kts": "kotlin", # L ".tex": "latex", ".sty": "latex", ".cls": "latex", ".ld": "linkerscript", ".ll": "llvm", ".td": "tablegen", ".lua": "lua", ".luadoc": "luadoc", ".luap": "luap", ".luau": "luau", # M ".magik": "magik", "Makefile": "make", ".mk": "make", ".md": "markdown", ".markdown": "markdown", ".m": "matlab", # Note: .m is used by both MATLAB and Objective-C, prioritizing MATLAB here ".mat": "matlab", ".mermaid": "mermaid", "meson.build": "meson", # N ".ninja": "ninja", ".nix": "nix", ".nqc": "nqc", # O # .m extension is handled under MATLAB section (dual use extension) ".mm": "objc", ".ml": "ocaml", ".mli": "ocaml_interface", ".odin": "odin", ".org": "org", # P ".pas": "pascal", ".pp": "pascal", ".pem": "pem", ".pl": "perl", ".pm": "perl", ".pgn": "pgn", ".php": "php", ".po": "po", ".pot": "po", ".pony": "pony", ".ps1": "powershell", ".psm1": "powershell", ".printf": "printf", ".prisma": "prisma", ".properties": "properties", ".proto": "proto", ".psv": "psv", ".purs": "purescript", "MANIFEST.in": "pymanifest", ".py": "python", # Q "qmldir": "qmldir", ".qml": "qmljs", # Q # R ".r": "r", ".R": "r", ".rkt": "racket", ".re2c": "re2c", ".inputrc": "readline", "requirements.txt": "requirements", ".ron": "ron", ".rst": "rst", ".rb": "ruby", ".rs": "rust", # S ".scala": "scala", ".sc": "scala", ".scm": "scheme", # .scm is primarily used for Scheme files ".ss": "scheme", ".scss": "scss", ".smali": "smali", ".smithy": "smithy", ".sol": "solidity", ".rq": "sparql", ".sql": "sql", ".nut": "squirrel", ".bzl": "starlark", "BUILD": "starlark", "WORKSPACE": "starlark", ".svelte": "svelte", ".swift": "swift", # T ".tcl": "tcl", ".thrift": "thrift", ".toml": "toml", ".tsv": "tsv", ".tsx": "typescript", ".twig": "twig", ".ts": "typescript", ".typ": "typst", # U ".rules": "udev", ".ungram": "ungrammar", ".tal": "uxntal", # V # Note: .v extension is used by both V language and Verilog # Prioritizing Verilog as it's more commonly used ".sv": "verilog", ".v": "verilog", # For V language, users may need to specify parser manually ".vhd": "vhdl", ".vhdl": "vhdl", ".vim": "vim", ".vimrc": "vim", ".vue": "vue", # W ".wgsl": "wgsl", # X ".XCompose": "xcompose", ".xml": "xml", ".svg": "xml", ".xsl": "xml", # Y ".yuck": "yuck", # Z ".zig": "zig", } def filename_to_lang(filename): # First check if the full filename (like "Dockerfile" or "go.mod") is in PARSERS basename = os.path.basename(filename) if basename in PARSERS: return PARSERS[basename] # If not found by full filename, check by extension file_extension = os.path.splitext(filename)[1] return PARSERS.get(file_extension)