311 lines
7.7 KiB
Python
311 lines
7.7 KiB
Python
import os
|
|
|
|
from .tsl import USING_TSL_PACK
|
|
|
|
# Updated mapping of file extensions to parsers
|
|
PARSERS = {
|
|
".py": "python",
|
|
".js": "javascript",
|
|
".mjs": "javascript", # mjs file extension stands for "module JavaScript."
|
|
".go": "go",
|
|
".bash": "bash",
|
|
".c": "c",
|
|
".cc": "cpp",
|
|
".cs": "c_sharp",
|
|
".cl": "commonlisp",
|
|
".cpp": "cpp",
|
|
".css": "css",
|
|
".dockerfile": "dockerfile",
|
|
".dot": "dot",
|
|
".el": "elisp",
|
|
".ex": "elixir",
|
|
".elm": "elm",
|
|
".et": "embedded_template",
|
|
".erl": "erlang",
|
|
".gomod": "gomod",
|
|
".hack": "hack",
|
|
".hs": "haskell",
|
|
".hcl": "hcl",
|
|
".html": "html",
|
|
".java": "java",
|
|
".jsdoc": "jsdoc",
|
|
".json": "json",
|
|
".jl": "julia",
|
|
".kt": "kotlin",
|
|
".lua": "lua",
|
|
".mk": "make",
|
|
".md": "markdown", # https://github.com/ikatyang/tree-sitter-markdown/issues/59
|
|
".m": "objc",
|
|
".ml": "ocaml",
|
|
".mli": "ocaml_interface",
|
|
".pl": "perl",
|
|
".php": "php",
|
|
".ql": "ql",
|
|
".r": "r",
|
|
".R": "r",
|
|
".regex": "regex",
|
|
".rst": "rst",
|
|
".rb": "ruby",
|
|
".rs": "rust",
|
|
".scala": "scala",
|
|
".sql": "sql",
|
|
".sqlite": "sqlite",
|
|
".tf": "hcl",
|
|
".toml": "toml",
|
|
".tsq": "tsq",
|
|
".tsx": "typescript",
|
|
".ts": "typescript",
|
|
".yaml": "yaml",
|
|
}
|
|
|
|
if USING_TSL_PACK:
|
|
# Replace the PARSERS dictionary with a comprehensive mapping based on the language pack
|
|
PARSERS = {
|
|
# A
|
|
".as": "actionscript",
|
|
".adb": "ada",
|
|
".ads": "ada",
|
|
".agda": "agda",
|
|
".ino": "arduino",
|
|
".asm": "asm",
|
|
".s": "asm",
|
|
".astro": "astro",
|
|
# B
|
|
".sh": "bash",
|
|
".bash": "bash",
|
|
".zsh": "bash",
|
|
".bean": "beancount",
|
|
".bib": "bibtex",
|
|
".bicep": "bicep",
|
|
".bb": "bitbake",
|
|
".bbappend": "bitbake",
|
|
".bbclass": "bitbake",
|
|
# C
|
|
".c": "c",
|
|
".h": "c",
|
|
".cairo": "cairo",
|
|
".capnp": "capnp",
|
|
".chatito": "chatito",
|
|
".clar": "clarity",
|
|
".clj": "clojure",
|
|
".cljs": "clojure",
|
|
".cljc": "clojure",
|
|
".edn": "clojure",
|
|
".cmake": "cmake",
|
|
"CMakeLists.txt": "cmake",
|
|
".lisp": "commonlisp",
|
|
".cl": "commonlisp",
|
|
".cpon": "cpon",
|
|
".cpp": "cpp",
|
|
".cc": "cpp",
|
|
".cxx": "cpp",
|
|
".hpp": "cpp",
|
|
".hxx": "cpp",
|
|
".h++": "cpp",
|
|
".cs": "csharp",
|
|
".css": "css",
|
|
".csv": "csv",
|
|
".cu": "cuda",
|
|
".cuh": "cuda",
|
|
".d": "d",
|
|
# D
|
|
".dart": "dart",
|
|
"Dockerfile": "dockerfile",
|
|
".dtd": "dtd",
|
|
# E
|
|
".el": "elisp",
|
|
".ex": "elixir",
|
|
".exs": "elixir",
|
|
".elm": "elm",
|
|
".erl": "erlang",
|
|
".hrl": "erlang",
|
|
# F
|
|
".fnl": "fennel",
|
|
".fir": "firrtl",
|
|
".fish": "fish",
|
|
".f": "fortran",
|
|
".f90": "fortran",
|
|
".f95": "fortran",
|
|
".f03": "fortran",
|
|
".f08": "fortran",
|
|
".fc": "func",
|
|
# G
|
|
".gd": "gdscript",
|
|
".gitattributes": "gitattributes",
|
|
".gitcommit": "gitcommit",
|
|
".gitignore": "gitignore",
|
|
".gleam": "gleam",
|
|
".glsl": "glsl",
|
|
".vert": "glsl",
|
|
".frag": "glsl",
|
|
".gn": "gn",
|
|
".gni": "gn",
|
|
".go": "go",
|
|
"go.mod": "gomod",
|
|
"go.sum": "gosum",
|
|
".groovy": "groovy",
|
|
".launch": "gstlaunch",
|
|
# H
|
|
".hack": "hack",
|
|
".ha": "hare",
|
|
".hs": "haskell",
|
|
".hx": "haxe",
|
|
".hcl": "hcl",
|
|
".tf": "hcl",
|
|
".tfvars": "hcl",
|
|
".heex": "heex",
|
|
".hlsl": "hlsl",
|
|
".html": "html",
|
|
".htm": "html",
|
|
".hypr": "hyprlang",
|
|
# I
|
|
".ispc": "ispc",
|
|
# J
|
|
".janet": "janet",
|
|
".java": "java",
|
|
".js": "javascript",
|
|
".jsx": "javascript",
|
|
".mjs": "javascript",
|
|
".jsdoc": "jsdoc",
|
|
".json": "json",
|
|
".jsonnet": "jsonnet",
|
|
".libsonnet": "jsonnet",
|
|
".jl": "julia",
|
|
# K
|
|
"Kconfig": "kconfig",
|
|
".kdl": "kdl",
|
|
".kt": "kotlin",
|
|
".kts": "kotlin",
|
|
# L
|
|
".tex": "latex",
|
|
".sty": "latex",
|
|
".cls": "latex",
|
|
".ld": "linkerscript",
|
|
".ll": "llvm",
|
|
".td": "tablegen",
|
|
".lua": "lua",
|
|
".luadoc": "luadoc",
|
|
".luap": "luap",
|
|
".luau": "luau",
|
|
# M
|
|
".magik": "magik",
|
|
"Makefile": "make",
|
|
".mk": "make",
|
|
".md": "markdown",
|
|
".markdown": "markdown",
|
|
".m": "matlab", # Note: .m is used by both MATLAB and Objective-C, prioritizing MATLAB here
|
|
".mat": "matlab",
|
|
".mermaid": "mermaid",
|
|
"meson.build": "meson",
|
|
# N
|
|
".ninja": "ninja",
|
|
".nix": "nix",
|
|
".nqc": "nqc",
|
|
# O
|
|
# .m extension is handled under MATLAB section (dual use extension)
|
|
".mm": "objc",
|
|
".ml": "ocaml",
|
|
".mli": "ocaml_interface",
|
|
".odin": "odin",
|
|
".org": "org",
|
|
# P
|
|
".pas": "pascal",
|
|
".pp": "pascal",
|
|
".pem": "pem",
|
|
".pl": "perl",
|
|
".pm": "perl",
|
|
".pgn": "pgn",
|
|
".php": "php",
|
|
".po": "po",
|
|
".pot": "po",
|
|
".pony": "pony",
|
|
".ps1": "powershell",
|
|
".psm1": "powershell",
|
|
".printf": "printf",
|
|
".prisma": "prisma",
|
|
".properties": "properties",
|
|
".proto": "proto",
|
|
".psv": "psv",
|
|
".purs": "purescript",
|
|
"MANIFEST.in": "pymanifest",
|
|
".py": "python",
|
|
# Q
|
|
"qmldir": "qmldir",
|
|
".qml": "qmljs",
|
|
# Q
|
|
# R
|
|
".r": "r",
|
|
".R": "r",
|
|
".rkt": "racket",
|
|
".re2c": "re2c",
|
|
".inputrc": "readline",
|
|
"requirements.txt": "requirements",
|
|
".ron": "ron",
|
|
".rst": "rst",
|
|
".rb": "ruby",
|
|
".rs": "rust",
|
|
# S
|
|
".scala": "scala",
|
|
".sc": "scala",
|
|
".scm": "scheme", # .scm is primarily used for Scheme files
|
|
".ss": "scheme",
|
|
".scss": "scss",
|
|
".smali": "smali",
|
|
".smithy": "smithy",
|
|
".sol": "solidity",
|
|
".rq": "sparql",
|
|
".sql": "sql",
|
|
".nut": "squirrel",
|
|
".bzl": "starlark",
|
|
"BUILD": "starlark",
|
|
"WORKSPACE": "starlark",
|
|
".svelte": "svelte",
|
|
".swift": "swift",
|
|
# T
|
|
".tcl": "tcl",
|
|
".thrift": "thrift",
|
|
".toml": "toml",
|
|
".tsv": "tsv",
|
|
".tsx": "typescript",
|
|
".twig": "twig",
|
|
".ts": "typescript",
|
|
".typ": "typst",
|
|
# U
|
|
".rules": "udev",
|
|
".ungram": "ungrammar",
|
|
".tal": "uxntal",
|
|
# V
|
|
# Note: .v extension is used by both V language and Verilog
|
|
# Prioritizing Verilog as it's more commonly used
|
|
".sv": "verilog",
|
|
".v": "verilog",
|
|
# For V language, users may need to specify parser manually
|
|
".vhd": "vhdl",
|
|
".vhdl": "vhdl",
|
|
".vim": "vim",
|
|
".vimrc": "vim",
|
|
".vue": "vue",
|
|
# W
|
|
".wgsl": "wgsl",
|
|
# X
|
|
".XCompose": "xcompose",
|
|
".xml": "xml",
|
|
".svg": "xml",
|
|
".xsl": "xml",
|
|
# Y
|
|
".yuck": "yuck",
|
|
# Z
|
|
".zig": "zig",
|
|
}
|
|
|
|
|
|
def filename_to_lang(filename):
|
|
# First check if the full filename (like "Dockerfile" or "go.mod") is in PARSERS
|
|
basename = os.path.basename(filename)
|
|
if basename in PARSERS:
|
|
return PARSERS[basename]
|
|
|
|
# If not found by full filename, check by extension
|
|
file_extension = os.path.splitext(filename)[1]
|
|
return PARSERS.get(file_extension)
|