You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

699 lines
12 KiB

6 months ago
  1. /**
  2. * @param {string} value
  3. * @returns {RegExp}
  4. * */
  5. /**
  6. * @param {RegExp | string } re
  7. * @returns {string}
  8. */
  9. function source(re) {
  10. if (!re) return null;
  11. if (typeof re === "string") return re;
  12. return re.source;
  13. }
  14. /**
  15. * @param {...(RegExp | string) } args
  16. * @returns {string}
  17. */
  18. function concat(...args) {
  19. const joined = args.map((x) => source(x)).join("");
  20. return joined;
  21. }
  22. /**
  23. * Any of the passed expresssions may match
  24. *
  25. * Creates a huge this | this | that | that match
  26. * @param {(RegExp | string)[] } args
  27. * @returns {string}
  28. */
  29. function either(...args) {
  30. const joined = '(' + args.map((x) => source(x)).join("|") + ")";
  31. return joined;
  32. }
  33. /*
  34. Language: SQL
  35. Website: https://en.wikipedia.org/wiki/SQL
  36. Category: common, database
  37. */
  38. function sql(hljs) {
  39. const COMMENT_MODE = hljs.COMMENT('--', '$');
  40. const STRING = {
  41. className: 'string',
  42. variants: [
  43. {
  44. begin: /'/,
  45. end: /'/,
  46. contains: [
  47. {begin: /''/ }
  48. ]
  49. }
  50. ]
  51. };
  52. const QUOTED_IDENTIFIER = {
  53. begin: /"/,
  54. end: /"/,
  55. contains: [ { begin: /""/ } ]
  56. };
  57. const LITERALS = [
  58. "true",
  59. "false",
  60. // Not sure it's correct to call NULL literal, and clauses like IS [NOT] NULL look strange that way.
  61. // "null",
  62. "unknown"
  63. ];
  64. const MULTI_WORD_TYPES = [
  65. "double precision",
  66. "large object",
  67. "with timezone",
  68. "without timezone"
  69. ];
  70. const TYPES = [
  71. 'bigint',
  72. 'binary',
  73. 'blob',
  74. 'boolean',
  75. 'char',
  76. 'character',
  77. 'clob',
  78. 'date',
  79. 'dec',
  80. 'decfloat',
  81. 'decimal',
  82. 'float',
  83. 'int',
  84. 'integer',
  85. 'interval',
  86. 'nchar',
  87. 'nclob',
  88. 'national',
  89. 'numeric',
  90. 'real',
  91. 'row',
  92. 'smallint',
  93. 'time',
  94. 'timestamp',
  95. 'varchar',
  96. 'varying', // modifier (character varying)
  97. 'varbinary'
  98. ];
  99. const NON_RESERVED_WORDS = [
  100. "add",
  101. "asc",
  102. "collation",
  103. "desc",
  104. "final",
  105. "first",
  106. "last",
  107. "view"
  108. ];
  109. // https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#reserved-word
  110. const RESERVED_WORDS = [
  111. "abs",
  112. "acos",
  113. "all",
  114. "allocate",
  115. "alter",
  116. "and",
  117. "any",
  118. "are",
  119. "array",
  120. "array_agg",
  121. "array_max_cardinality",
  122. "as",
  123. "asensitive",
  124. "asin",
  125. "asymmetric",
  126. "at",
  127. "atan",
  128. "atomic",
  129. "authorization",
  130. "avg",
  131. "begin",
  132. "begin_frame",
  133. "begin_partition",
  134. "between",
  135. "bigint",
  136. "binary",
  137. "blob",
  138. "boolean",
  139. "both",
  140. "by",
  141. "call",
  142. "called",
  143. "cardinality",
  144. "cascaded",
  145. "case",
  146. "cast",
  147. "ceil",
  148. "ceiling",
  149. "char",
  150. "char_length",
  151. "character",
  152. "character_length",
  153. "check",
  154. "classifier",
  155. "clob",
  156. "close",
  157. "coalesce",
  158. "collate",
  159. "collect",
  160. "column",
  161. "commit",
  162. "condition",
  163. "connect",
  164. "constraint",
  165. "contains",
  166. "convert",
  167. "copy",
  168. "corr",
  169. "corresponding",
  170. "cos",
  171. "cosh",
  172. "count",
  173. "covar_pop",
  174. "covar_samp",
  175. "create",
  176. "cross",
  177. "cube",
  178. "cume_dist",
  179. "current",
  180. "current_catalog",
  181. "current_date",
  182. "current_default_transform_group",
  183. "current_path",
  184. "current_role",
  185. "current_row",
  186. "current_schema",
  187. "current_time",
  188. "current_timestamp",
  189. "current_path",
  190. "current_role",
  191. "current_transform_group_for_type",
  192. "current_user",
  193. "cursor",
  194. "cycle",
  195. "date",
  196. "day",
  197. "deallocate",
  198. "dec",
  199. "decimal",
  200. "decfloat",
  201. "declare",
  202. "default",
  203. "define",
  204. "delete",
  205. "dense_rank",
  206. "deref",
  207. "describe",
  208. "deterministic",
  209. "disconnect",
  210. "distinct",
  211. "double",
  212. "drop",
  213. "dynamic",
  214. "each",
  215. "element",
  216. "else",
  217. "empty",
  218. "end",
  219. "end_frame",
  220. "end_partition",
  221. "end-exec",
  222. "equals",
  223. "escape",
  224. "every",
  225. "except",
  226. "exec",
  227. "execute",
  228. "exists",
  229. "exp",
  230. "external",
  231. "extract",
  232. "false",
  233. "fetch",
  234. "filter",
  235. "first_value",
  236. "float",
  237. "floor",
  238. "for",
  239. "foreign",
  240. "frame_row",
  241. "free",
  242. "from",
  243. "full",
  244. "function",
  245. "fusion",
  246. "get",
  247. "global",
  248. "grant",
  249. "group",
  250. "grouping",
  251. "groups",
  252. "having",
  253. "hold",
  254. "hour",
  255. "identity",
  256. "in",
  257. "indicator",
  258. "initial",
  259. "inner",
  260. "inout",
  261. "insensitive",
  262. "insert",
  263. "int",
  264. "integer",
  265. "intersect",
  266. "intersection",
  267. "interval",
  268. "into",
  269. "is",
  270. "join",
  271. "json_array",
  272. "json_arrayagg",
  273. "json_exists",
  274. "json_object",
  275. "json_objectagg",
  276. "json_query",
  277. "json_table",
  278. "json_table_primitive",
  279. "json_value",
  280. "lag",
  281. "language",
  282. "large",
  283. "last_value",
  284. "lateral",
  285. "lead",
  286. "leading",
  287. "left",
  288. "like",
  289. "like_regex",
  290. "listagg",
  291. "ln",
  292. "local",
  293. "localtime",
  294. "localtimestamp",
  295. "log",
  296. "log10",
  297. "lower",
  298. "match",
  299. "match_number",
  300. "match_recognize",
  301. "matches",
  302. "max",
  303. "member",
  304. "merge",
  305. "method",
  306. "min",
  307. "minute",
  308. "mod",
  309. "modifies",
  310. "module",
  311. "month",
  312. "multiset",
  313. "national",
  314. "natural",
  315. "nchar",
  316. "nclob",
  317. "new",
  318. "no",
  319. "none",
  320. "normalize",
  321. "not",
  322. "nth_value",
  323. "ntile",
  324. "null",
  325. "nullif",
  326. "numeric",
  327. "octet_length",
  328. "occurrences_regex",
  329. "of",
  330. "offset",
  331. "old",
  332. "omit",
  333. "on",
  334. "one",
  335. "only",
  336. "open",
  337. "or",
  338. "order",
  339. "out",
  340. "outer",
  341. "over",
  342. "overlaps",
  343. "overlay",
  344. "parameter",
  345. "partition",
  346. "pattern",
  347. "per",
  348. "percent",
  349. "percent_rank",
  350. "percentile_cont",
  351. "percentile_disc",
  352. "period",
  353. "portion",
  354. "position",
  355. "position_regex",
  356. "power",
  357. "precedes",
  358. "precision",
  359. "prepare",
  360. "primary",
  361. "procedure",
  362. "ptf",
  363. "range",
  364. "rank",
  365. "reads",
  366. "real",
  367. "recursive",
  368. "ref",
  369. "references",
  370. "referencing",
  371. "regr_avgx",
  372. "regr_avgy",
  373. "regr_count",
  374. "regr_intercept",
  375. "regr_r2",
  376. "regr_slope",
  377. "regr_sxx",
  378. "regr_sxy",
  379. "regr_syy",
  380. "release",
  381. "result",
  382. "return",
  383. "returns",
  384. "revoke",
  385. "right",
  386. "rollback",
  387. "rollup",
  388. "row",
  389. "row_number",
  390. "rows",
  391. "running",
  392. "savepoint",
  393. "scope",
  394. "scroll",
  395. "search",
  396. "second",
  397. "seek",
  398. "select",
  399. "sensitive",
  400. "session_user",
  401. "set",
  402. "show",
  403. "similar",
  404. "sin",
  405. "sinh",
  406. "skip",
  407. "smallint",
  408. "some",
  409. "specific",
  410. "specifictype",
  411. "sql",
  412. "sqlexception",
  413. "sqlstate",
  414. "sqlwarning",
  415. "sqrt",
  416. "start",
  417. "static",
  418. "stddev_pop",
  419. "stddev_samp",
  420. "submultiset",
  421. "subset",
  422. "substring",
  423. "substring_regex",
  424. "succeeds",
  425. "sum",
  426. "symmetric",
  427. "system",
  428. "system_time",
  429. "system_user",
  430. "table",
  431. "tablesample",
  432. "tan",
  433. "tanh",
  434. "then",
  435. "time",
  436. "timestamp",
  437. "timezone_hour",
  438. "timezone_minute",
  439. "to",
  440. "trailing",
  441. "translate",
  442. "translate_regex",
  443. "translation",
  444. "treat",
  445. "trigger",
  446. "trim",
  447. "trim_array",
  448. "true",
  449. "truncate",
  450. "uescape",
  451. "union",
  452. "unique",
  453. "unknown",
  454. "unnest",
  455. "update ",
  456. "upper",
  457. "user",
  458. "using",
  459. "value",
  460. "values",
  461. "value_of",
  462. "var_pop",
  463. "var_samp",
  464. "varbinary",
  465. "varchar",
  466. "varying",
  467. "versioning",
  468. "when",
  469. "whenever",
  470. "where",
  471. "width_bucket",
  472. "window",
  473. "with",
  474. "within",
  475. "without",
  476. "year",
  477. ];
  478. // these are reserved words we have identified to be functions
  479. // and should only be highlighted in a dispatch-like context
  480. // ie, array_agg(...), etc.
  481. const RESERVED_FUNCTIONS = [
  482. "abs",
  483. "acos",
  484. "array_agg",
  485. "asin",
  486. "atan",
  487. "avg",
  488. "cast",
  489. "ceil",
  490. "ceiling",
  491. "coalesce",
  492. "corr",
  493. "cos",
  494. "cosh",
  495. "count",
  496. "covar_pop",
  497. "covar_samp",
  498. "cume_dist",
  499. "dense_rank",
  500. "deref",
  501. "element",
  502. "exp",
  503. "extract",
  504. "first_value",
  505. "floor",
  506. "json_array",
  507. "json_arrayagg",
  508. "json_exists",
  509. "json_object",
  510. "json_objectagg",
  511. "json_query",
  512. "json_table",
  513. "json_table_primitive",
  514. "json_value",
  515. "lag",
  516. "last_value",
  517. "lead",
  518. "listagg",
  519. "ln",
  520. "log",
  521. "log10",
  522. "lower",
  523. "max",
  524. "min",
  525. "mod",
  526. "nth_value",
  527. "ntile",
  528. "nullif",
  529. "percent_rank",
  530. "percentile_cont",
  531. "percentile_disc",
  532. "position",
  533. "position_regex",
  534. "power",
  535. "rank",
  536. "regr_avgx",
  537. "regr_avgy",
  538. "regr_count",
  539. "regr_intercept",
  540. "regr_r2",
  541. "regr_slope",
  542. "regr_sxx",
  543. "regr_sxy",
  544. "regr_syy",
  545. "row_number",
  546. "sin",
  547. "sinh",
  548. "sqrt",
  549. "stddev_pop",
  550. "stddev_samp",
  551. "substring",
  552. "substring_regex",
  553. "sum",
  554. "tan",
  555. "tanh",
  556. "translate",
  557. "translate_regex",
  558. "treat",
  559. "trim",
  560. "trim_array",
  561. "unnest",
  562. "upper",
  563. "value_of",
  564. "var_pop",
  565. "var_samp",
  566. "width_bucket",
  567. ];
  568. // these functions can
  569. const POSSIBLE_WITHOUT_PARENS = [
  570. "current_catalog",
  571. "current_date",
  572. "current_default_transform_group",
  573. "current_path",
  574. "current_role",
  575. "current_schema",
  576. "current_transform_group_for_type",
  577. "current_user",
  578. "session_user",
  579. "system_time",
  580. "system_user",
  581. "current_time",
  582. "localtime",
  583. "current_timestamp",
  584. "localtimestamp"
  585. ];
  586. // those exist to boost relevance making these very
  587. // "SQL like" keyword combos worth +1 extra relevance
  588. const COMBOS = [
  589. "create table",
  590. "insert into",
  591. "primary key",
  592. "foreign key",
  593. "not null",
  594. "alter table",
  595. "add constraint",
  596. "grouping sets",
  597. "on overflow",
  598. "character set",
  599. "respect nulls",
  600. "ignore nulls",
  601. "nulls first",
  602. "nulls last",
  603. "depth first",
  604. "breadth first"
  605. ];
  606. const FUNCTIONS = RESERVED_FUNCTIONS;
  607. const KEYWORDS = [...RESERVED_WORDS, ...NON_RESERVED_WORDS].filter((keyword) => {
  608. return !RESERVED_FUNCTIONS.includes(keyword);
  609. });
  610. const VARIABLE = {
  611. className: "variable",
  612. begin: /@[a-z0-9]+/,
  613. };
  614. const OPERATOR = {
  615. className: "operator",
  616. begin: /[-+*/=%^~]|&&?|\|\|?|!=?|<(?:=>?|<|>)?|>[>=]?/,
  617. relevance: 0,
  618. };
  619. const FUNCTION_CALL = {
  620. begin: concat(/\b/, either(...FUNCTIONS), /\s*\(/),
  621. keywords: {
  622. built_in: FUNCTIONS
  623. }
  624. };
  625. // keywords with less than 3 letters are reduced in relevancy
  626. function reduceRelevancy(list, {exceptions, when} = {}) {
  627. const qualifyFn = when;
  628. exceptions = exceptions || [];
  629. return list.map((item) => {
  630. if (item.match(/\|\d+$/) || exceptions.includes(item)) {
  631. return item;
  632. } else if (qualifyFn(item)) {
  633. return `${item}|0`;
  634. } else {
  635. return item;
  636. }
  637. });
  638. }
  639. return {
  640. name: 'SQL',
  641. case_insensitive: true,
  642. // does not include {} or HTML tags `</`
  643. illegal: /[{}]|<\//,
  644. keywords: {
  645. $pattern: /\b[\w\.]+/,
  646. keyword:
  647. reduceRelevancy(KEYWORDS, { when: (x) => x.length < 3 }),
  648. literal: LITERALS,
  649. type: TYPES,
  650. built_in: POSSIBLE_WITHOUT_PARENS
  651. },
  652. contains: [
  653. {
  654. begin: either(...COMBOS),
  655. keywords: {
  656. $pattern: /[\w\.]+/,
  657. keyword: KEYWORDS.concat(COMBOS),
  658. literal: LITERALS,
  659. type: TYPES
  660. },
  661. },
  662. {
  663. className: "type",
  664. begin: either(...MULTI_WORD_TYPES)
  665. },
  666. FUNCTION_CALL,
  667. VARIABLE,
  668. STRING,
  669. QUOTED_IDENTIFIER,
  670. hljs.C_NUMBER_MODE,
  671. hljs.C_BLOCK_COMMENT_MODE,
  672. COMMENT_MODE,
  673. OPERATOR
  674. ]
  675. };
  676. }
  677. module.exports = sql;