ai-station/.venv/lib/python3.12/site-packages/grep_ast/parsers.py

311 lines
7.7 KiB
Python

import os
from .tsl import USING_TSL_PACK
# Updated mapping of file extensions to parsers
PARSERS = {
".py": "python",
".js": "javascript",
".mjs": "javascript", # mjs file extension stands for "module JavaScript."
".go": "go",
".bash": "bash",
".c": "c",
".cc": "cpp",
".cs": "c_sharp",
".cl": "commonlisp",
".cpp": "cpp",
".css": "css",
".dockerfile": "dockerfile",
".dot": "dot",
".el": "elisp",
".ex": "elixir",
".elm": "elm",
".et": "embedded_template",
".erl": "erlang",
".gomod": "gomod",
".hack": "hack",
".hs": "haskell",
".hcl": "hcl",
".html": "html",
".java": "java",
".jsdoc": "jsdoc",
".json": "json",
".jl": "julia",
".kt": "kotlin",
".lua": "lua",
".mk": "make",
".md": "markdown", # https://github.com/ikatyang/tree-sitter-markdown/issues/59
".m": "objc",
".ml": "ocaml",
".mli": "ocaml_interface",
".pl": "perl",
".php": "php",
".ql": "ql",
".r": "r",
".R": "r",
".regex": "regex",
".rst": "rst",
".rb": "ruby",
".rs": "rust",
".scala": "scala",
".sql": "sql",
".sqlite": "sqlite",
".tf": "hcl",
".toml": "toml",
".tsq": "tsq",
".tsx": "typescript",
".ts": "typescript",
".yaml": "yaml",
}
if USING_TSL_PACK:
# Replace the PARSERS dictionary with a comprehensive mapping based on the language pack
PARSERS = {
# A
".as": "actionscript",
".adb": "ada",
".ads": "ada",
".agda": "agda",
".ino": "arduino",
".asm": "asm",
".s": "asm",
".astro": "astro",
# B
".sh": "bash",
".bash": "bash",
".zsh": "bash",
".bean": "beancount",
".bib": "bibtex",
".bicep": "bicep",
".bb": "bitbake",
".bbappend": "bitbake",
".bbclass": "bitbake",
# C
".c": "c",
".h": "c",
".cairo": "cairo",
".capnp": "capnp",
".chatito": "chatito",
".clar": "clarity",
".clj": "clojure",
".cljs": "clojure",
".cljc": "clojure",
".edn": "clojure",
".cmake": "cmake",
"CMakeLists.txt": "cmake",
".lisp": "commonlisp",
".cl": "commonlisp",
".cpon": "cpon",
".cpp": "cpp",
".cc": "cpp",
".cxx": "cpp",
".hpp": "cpp",
".hxx": "cpp",
".h++": "cpp",
".cs": "csharp",
".css": "css",
".csv": "csv",
".cu": "cuda",
".cuh": "cuda",
".d": "d",
# D
".dart": "dart",
"Dockerfile": "dockerfile",
".dtd": "dtd",
# E
".el": "elisp",
".ex": "elixir",
".exs": "elixir",
".elm": "elm",
".erl": "erlang",
".hrl": "erlang",
# F
".fnl": "fennel",
".fir": "firrtl",
".fish": "fish",
".f": "fortran",
".f90": "fortran",
".f95": "fortran",
".f03": "fortran",
".f08": "fortran",
".fc": "func",
# G
".gd": "gdscript",
".gitattributes": "gitattributes",
".gitcommit": "gitcommit",
".gitignore": "gitignore",
".gleam": "gleam",
".glsl": "glsl",
".vert": "glsl",
".frag": "glsl",
".gn": "gn",
".gni": "gn",
".go": "go",
"go.mod": "gomod",
"go.sum": "gosum",
".groovy": "groovy",
".launch": "gstlaunch",
# H
".hack": "hack",
".ha": "hare",
".hs": "haskell",
".hx": "haxe",
".hcl": "hcl",
".tf": "hcl",
".tfvars": "hcl",
".heex": "heex",
".hlsl": "hlsl",
".html": "html",
".htm": "html",
".hypr": "hyprlang",
# I
".ispc": "ispc",
# J
".janet": "janet",
".java": "java",
".js": "javascript",
".jsx": "javascript",
".mjs": "javascript",
".jsdoc": "jsdoc",
".json": "json",
".jsonnet": "jsonnet",
".libsonnet": "jsonnet",
".jl": "julia",
# K
"Kconfig": "kconfig",
".kdl": "kdl",
".kt": "kotlin",
".kts": "kotlin",
# L
".tex": "latex",
".sty": "latex",
".cls": "latex",
".ld": "linkerscript",
".ll": "llvm",
".td": "tablegen",
".lua": "lua",
".luadoc": "luadoc",
".luap": "luap",
".luau": "luau",
# M
".magik": "magik",
"Makefile": "make",
".mk": "make",
".md": "markdown",
".markdown": "markdown",
".m": "matlab", # Note: .m is used by both MATLAB and Objective-C, prioritizing MATLAB here
".mat": "matlab",
".mermaid": "mermaid",
"meson.build": "meson",
# N
".ninja": "ninja",
".nix": "nix",
".nqc": "nqc",
# O
# .m extension is handled under MATLAB section (dual use extension)
".mm": "objc",
".ml": "ocaml",
".mli": "ocaml_interface",
".odin": "odin",
".org": "org",
# P
".pas": "pascal",
".pp": "pascal",
".pem": "pem",
".pl": "perl",
".pm": "perl",
".pgn": "pgn",
".php": "php",
".po": "po",
".pot": "po",
".pony": "pony",
".ps1": "powershell",
".psm1": "powershell",
".printf": "printf",
".prisma": "prisma",
".properties": "properties",
".proto": "proto",
".psv": "psv",
".purs": "purescript",
"MANIFEST.in": "pymanifest",
".py": "python",
# Q
"qmldir": "qmldir",
".qml": "qmljs",
# Q
# R
".r": "r",
".R": "r",
".rkt": "racket",
".re2c": "re2c",
".inputrc": "readline",
"requirements.txt": "requirements",
".ron": "ron",
".rst": "rst",
".rb": "ruby",
".rs": "rust",
# S
".scala": "scala",
".sc": "scala",
".scm": "scheme", # .scm is primarily used for Scheme files
".ss": "scheme",
".scss": "scss",
".smali": "smali",
".smithy": "smithy",
".sol": "solidity",
".rq": "sparql",
".sql": "sql",
".nut": "squirrel",
".bzl": "starlark",
"BUILD": "starlark",
"WORKSPACE": "starlark",
".svelte": "svelte",
".swift": "swift",
# T
".tcl": "tcl",
".thrift": "thrift",
".toml": "toml",
".tsv": "tsv",
".tsx": "typescript",
".twig": "twig",
".ts": "typescript",
".typ": "typst",
# U
".rules": "udev",
".ungram": "ungrammar",
".tal": "uxntal",
# V
# Note: .v extension is used by both V language and Verilog
# Prioritizing Verilog as it's more commonly used
".sv": "verilog",
".v": "verilog",
# For V language, users may need to specify parser manually
".vhd": "vhdl",
".vhdl": "vhdl",
".vim": "vim",
".vimrc": "vim",
".vue": "vue",
# W
".wgsl": "wgsl",
# X
".XCompose": "xcompose",
".xml": "xml",
".svg": "xml",
".xsl": "xml",
# Y
".yuck": "yuck",
# Z
".zig": "zig",
}
def filename_to_lang(filename):
# First check if the full filename (like "Dockerfile" or "go.mod") is in PARSERS
basename = os.path.basename(filename)
if basename in PARSERS:
return PARSERS[basename]
# If not found by full filename, check by extension
file_extension = os.path.splitext(filename)[1]
return PARSERS.get(file_extension)