diff options
Diffstat (limited to 'configs/shared/emacs/.emacs.d/elpa/haskell-mode-20180601.143/haskell-lexeme.el')
-rw-r--r-- | configs/shared/emacs/.emacs.d/elpa/haskell-mode-20180601.143/haskell-lexeme.el | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/configs/shared/emacs/.emacs.d/elpa/haskell-mode-20180601.143/haskell-lexeme.el b/configs/shared/emacs/.emacs.d/elpa/haskell-mode-20180601.143/haskell-lexeme.el new file mode 100644 index 000000000000..877774e7756b --- /dev/null +++ b/configs/shared/emacs/.emacs.d/elpa/haskell-mode-20180601.143/haskell-lexeme.el @@ -0,0 +1,513 @@ +;;; haskell-lexeme.el --- haskell lexical tokens -*- coding: utf-8; lexical-binding: t -*- + +;; Copyright (C) 2015 Gracjan Polak + +;; This file is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: + +;;; Code: + +(require 'rx) + +(unless (category-docstring ?P) + (define-category ?P "Haskell symbol constituent characters") + (map-char-table + #'(lambda (key val) + (if (or + (and (consp key) (> (car key) 128)) + (and (numberp key) (> key 128))) + (if (member val '(Pc Pd Po Sm Sc Sk So)) + (modify-category-entry key ?P)))) + unicode-category-table) + + (dolist (key (string-to-list "!#$%&*+./<=>?@^|~\\-:")) + (modify-category-entry key ?P))) + +(defconst haskell-lexeme-modid + "[[:upper:]][[:alnum:]'_]*" + "Regexp matching a valid Haskell module identifier. + +Note that GHC accepts Unicode category UppercaseLetter as a first +character. Following letters are from Unicode categories +UppercaseLetter, LowercaseLetter, OtherLetter, TitlecaseLetter, +ModifierLetter, DecimalNumber, OtherNumber, backslash or +underscore.") + +(defconst haskell-lexeme-id + "[[:alpha:]_][[:alnum:]'_]*" + "Regexp matching a valid Haskell identifier. + +GHC accepts a string starting with any alphabetic character or +underscore followed by any alphanumeric character or underscore +or apostrophe.") + +(defconst haskell-lexeme-sym + "\\cP+" + "Regexp matching a valid Haskell variable or constructor symbol. + +GHC accepts a string of chars from the set +[:!#$%&*+./<=>?@^|~\\-] or Unicode category Symbol for chars with +codes larger than 128 only.") + +(defconst haskell-lexeme-idsym-first-char + "\\(?:[[:alpha:]_]\\|\\cP\\)" + "Regexp matching first character of a qualified or unqualified +identifier or symbol. + +Useful for `re-search-forward'.") + +(defconst haskell-lexeme-modid-opt-prefix + (concat "\\(?:" haskell-lexeme-modid "\\.\\)*") + "Regexp matching a valid Haskell module prefix, potentially empty. + +Module path prefix is separated by dots and finishes with a +dot. For path component syntax see `haskell-lexeme-modid'.") + +(defconst haskell-lexeme-qid-or-qsym + (rx-to-string `(: (regexp ,haskell-lexeme-modid-opt-prefix) + (group (| (regexp ,haskell-lexeme-id) (regexp ,haskell-lexeme-sym) + )))) + "Regexp matching a valid qualified identifier or symbol. + +Note that (match-string 1) returns the unqualified part.") + +(defun haskell-lexeme-looking-at-qidsym () + "Non-nil when point is just in front of an optionally qualified +identifier or symbol. + +Using this function is more efficient than matching against the +regexp `haskell-lexeme-qid-or-qsym'. + +Returns: + 'qid - if matched a qualified id: 'Data.Map' or 'Map' + 'qsym - if matched a qualified id: 'Monad.>>=' or '>>=' + 'qprefix - if matched only modid prefix: 'Data.' + +After successful 'qid or 'qsym match (match-string 1) will return +the unqualified part (if any)." + (let ((begin (point)) + (match-data-old (match-data))) + (save-excursion + (while (looking-at (concat haskell-lexeme-modid "\\.")) + (goto-char (match-end 0))) + (cond + ((looking-at haskell-lexeme-id) + (let ((beg (match-beginning 0)) + (end (match-end 0))) + + ;; check is MagicHash is present at the end of the token + (goto-char end) + (when (looking-at "#+") + (setq end (match-end 0))) + + (set-match-data + (list begin end + beg end))) + 'qid) + ((looking-at haskell-lexeme-sym) + (set-match-data + (list begin (match-end 0) + (match-beginning 0) (match-end 0))) + 'qsym) + ((equal begin (point)) + (set-match-data match-data-old) + nil) + (t + (set-match-data + (list begin (point) + nil nil)) + 'qprefix))))) + +(defun haskell-lexeme-looking-at-backtick () + "Non-nil when point is just in front of an identifier quoted with backticks. + +When match is successful, match-data will contain: + (match-text 1) - opening backtick + (match-text 2) - whole qualified identifier + (match-text 3) - unqualified part of identifier + (match-text 4) - closing backtick" + (let ((match-data-old (match-data)) + first-backtick-start + last-backtick-start + qid-start + id-start + id-end + result) + (save-excursion + (when (looking-at "`") + (setq first-backtick-start (match-beginning 0)) + (goto-char (match-end 0)) + (forward-comment (buffer-size)) + (when (haskell-lexeme-looking-at-qidsym) + (setq qid-start (match-beginning 0)) + (setq id-start (match-beginning 1)) + (setq id-end (match-end 1)) + (goto-char (match-end 0)) + (forward-comment (buffer-size)) + (when (looking-at "`") + (setq last-backtick-start (match-beginning 0)) + (set-match-data + (mapcar + (lambda (p) + (set-marker (make-marker) p)) + (list + first-backtick-start (1+ last-backtick-start) + first-backtick-start (1+ first-backtick-start) + qid-start id-end + id-start id-end + last-backtick-start (1+ last-backtick-start)))) + (setq result t))))) + (unless result + (set-match-data match-data-old)) + result)) + +(defconst haskell-lexeme-qid + (rx-to-string `(: (regexp "'*") + (regexp ,haskell-lexeme-modid-opt-prefix) + (group (regexp ,haskell-lexeme-id)))) + "Regexp matching a valid qualified identifier. + +Note that (match-string 1) returns the unqualified part.") + +(defconst haskell-lexeme-qsym + (rx-to-string `(: (regexp "'*") + (regexp ,haskell-lexeme-modid-opt-prefix) + (group (regexp ,haskell-lexeme-id)))) + "Regexp matching a valid qualified symbol. + +Note that (match-string 1) returns the unqualified part.") + +(defconst haskell-lexeme-number + (rx (| (: (regexp "[0-9]+\\.[0-9]+") (opt (regexp "[eE][-+]?[0-9]+"))) + (regexp "[0-9]+[eE][-+]?[0-9]+") + (regexp "0[xX][0-9a-fA-F]+") + (regexp "0[oO][0-7]+") + (regexp "[0-9]+"))) + "Regexp matching a floating point, decimal, octal or hexadecimal number. + +Note that negative sign char is not part of a number.") + +(defconst haskell-lexeme-char-literal-inside + (rx (| (not (any "\n'\\")) + (: "\\" + (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'" + "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" + "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE" + "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN" + "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL" + (regexp "[0-9]+") + (: "x" (regexp "[0-9a-fA-F]+")) + (: "o" (regexp "[0-7]+")) + (: "^" (regexp "[]A-Z@^_\\[]")))))) + "Regexp matching an inside of a character literal. + +Note that `haskell-lexeme-char-literal-inside' matches strictly +only escape sequences defined in Haskell Report.") + +(defconst haskell-lexeme--char-literal-rx + (rx-to-string `(: (group "'") + (| (: (group (regexp "[[:alpha:]_([]")) (group "'")) ; exactly one char + (: (group (| (regexp "\\\\[^\n][^'\n]*") ; allow quote just after first backslash + (regexp "[^[:alpha:]_:(['\n][^'\n]*"))) + (| (group "'") "\n" (regexp "\\'")))))) + "Regexp matching a character literal lookalike. + +Note that `haskell-lexeme--char-literal-rx' matches more than +Haskell Report specifies because we want to support also code +under edit. + +Character literals end with a quote or a newline or end of +buffer. + +Regexp has subgroup expressions: + (match-text 1) matches the opening quote. + (match-text 2) matches the inside of the character literal. + (match-text 3) matches the closing quote or an empty string + at the end of line or the end buffer.") + +(defun haskell-lexeme-looking-at-char-literal () + "Non-nil when point is at a char literal lookalike. + +Note that this function matches more than Haskell Report +specifies because we want to support also code under edit. + +Char literals end with a quote or an unescaped newline or end +of buffer. + +After successful match: + (match-text 1) matches the opening quote. + (match-text 2) matches the inside of the char literla. + (match-text 3) matches the closing quote, or a closing + newline or is nil when at the end of the buffer." + (when (looking-at haskell-lexeme--char-literal-rx) + (set-match-data + (list (match-beginning 0) (match-end 0) + (match-beginning 1) (match-end 1) + (or (match-beginning 2) (match-beginning 4)) (or (match-end 2) (match-end 4)) + (or (match-beginning 3) (match-beginning 5)) (or (match-end 3) (match-end 5)))) + t)) + +(defconst haskell-lexeme-string-literal-inside-item + (rx (| (not (any "\n\"\\")) + (: "\\" + (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'" "&" + "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK" + "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE" + "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN" + "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL" + (regexp "[0-9]+") + (: "x" (regexp "[0-9a-fA-F]+")) + (: "o" (regexp "[0-7]+")) + (: "^" (regexp "[]A-Z@^_\\[]")) + (regexp "[ \t\n\r\v\f]*\\\\"))))) + "Regexp matching an item that is a single character or a single +escape sequence inside of a string literal. + +Note that `haskell-lexeme-string-literal-inside-item' matches +strictly only escape sequences defined in Haskell Report.") + +(defconst haskell-lexeme-string-literal + (rx (: (group "\"") + (group (* (| (regexp "\\\\[ \t\n\r\v\f]*\\\\") + (regexp "\\\\[ \t\n\r\v\f]+") + (regexp "\\\\[^ \t\n\r\v\f]") + (* (regexp "[^\"\n\\]"))))) + (group (| "\"" (regexp "$") (regexp "\\\\?\\'") + )))) + "Regexp matching a string literal lookalike. + +Note that `haskell-lexeme-string-literal' matches more than +Haskell Report specifies because we want to support also code +under edit. + +String literals end with double quote or unescaped newline or end +of buffer. + +Regexp has subgroup expressions: + (match-text 1) matches the opening double quote. + (match-text 2) matches the inside of the string. + (match-text 3) matches the closing double quote or an empty string + at the end of line or the end buffer.") + +(defun haskell-lexeme-looking-at-string-literal () + "Non-nil when point is at a string literal lookalike. + +Note that this function matches more than Haskell Report +specifies because we want to support also code under edit. + +String literals end with double quote or unescaped newline or end +of buffer. + +After successful match: + (match-text 1) matches the opening doublequote. + (match-text 2) matches the inside of the string. + (match-text 3) matches the closing quote, or a closing + newline or is nil when at the end of the buffer." + (when (looking-at "\"") + (save-excursion + (let ((begin (point))) + (goto-char (match-end 0)) + (let (finish) + (while (and (not finish) + (re-search-forward "[\"\n\\]" nil 'goto-eob)) + (cond + ((equal (match-string 0) "\\") + (if (looking-at "[ \t\n\r\v\f]+\\\\?") + (goto-char (match-end 0)) + (goto-char (1+ (point))))) + + ((equal (match-string 0) "\"") + (set-match-data + (list begin (match-end 0) + begin (1+ begin) + (1+ begin) (match-beginning 0) + (match-beginning 0) (match-end 0))) + (setq finish t)) + + ((equal (match-string 0) "\n") + (set-match-data + (list begin (match-beginning 0) + begin (1+ begin) + (1+ begin) (match-beginning 0) + nil nil)) + (setq finish t)))) + (unless finish + ;; string closed by end of buffer + (set-match-data + (list begin (point) + begin (1+ begin) + (1+ begin) (point) + nil nil)))))) + ;; there was a match + t)) + +(defun haskell-lexeme-looking-at-quasi-quote-literal () + "Non-nil when point is just in front of Template Haskell +quaisquote literal. + +Quasi quotes start with '[xxx|' or '[$xxx|' sequence and end with + '|]'. The 'xxx' is a quoter name. There is no escaping mechanism +provided for the ending sequence. + +Regexp has subgroup expressions: + (match-text 1) matches the quoter name (without $ sign if present). + (match-text 2) matches the opening vertical bar. + (match-text 3) matches the inside of the quoted string. + (match-text 4) matches the closing vertical bar + or nil if at the end of the buffer. + +Note that this function excludes 'e', 't', 'd', 'p' as quoter +names according to Template Haskell specification." + (let ((match-data-old (match-data))) + (if (and + (looking-at (rx-to-string `(: "[" (optional "$") + (group (regexp ,haskell-lexeme-id)) + (group "|")))) + (equal (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1))) + 'varid) + (not (member (match-string 1) '("e" "t" "d" "p")))) + (save-excursion + ;; note that quasi quote syntax does not have any escaping + ;; mechanism and if not closed it will span til lthe end of buffer + (goto-char (match-end 0)) + (let ((match-data (match-data)) + (match-data-2 (and (re-search-forward "|]" nil t) + (match-data)))) + (if match-data-2 + (set-match-data + (list + (nth 0 match-data) (nth 1 match-data-2) ;; whole match + (nth 2 match-data) (nth 3 match-data) ;; quoter name + (nth 4 match-data) (nth 5 match-data) ;; opening bar + (nth 5 match-data) (nth 0 match-data-2) ;; inner string + (nth 0 match-data-2) (1+ (nth 0 match-data-2)))) ;; closing bar + + (set-match-data + (list + (nth 0 match-data) (point-max) ;; whole match + (nth 2 match-data) (nth 3 match-data) ;; quoter name + (nth 4 match-data) (nth 5 match-data) ;; opening bar + (nth 5 match-data) (point-max) ;; inner string + nil nil)) ;; closing bar + )) + t) + ;; restore old match data if not matched + (set-match-data match-data-old) + nil))) + +(defun haskell-lexeme-classify-by-first-char (char) + "Classify token by CHAR. + +CHAR is a chararacter that is assumed to be the first character +of a token." + (let ((category (get-char-code-property (or char ?\ ) 'general-category))) + + (cond + ((or (member char '(?! ?# ?$ ?% ?& ?* ?+ ?. ?/ ?< ?= ?> ?? ?@ ?^ ?| ?~ ?\\ ?-)) + (and (> char 127) + (member category '(Pc Pd Po Sm Sc Sk So)))) + 'varsym) + ((equal char ?:) + 'consym) + ((equal char ?\') + 'char) + ((equal char ?\") + 'string) + ((member category '(Lu Lt)) + 'conid) + ((or (equal char ?_) + (member category '(Ll Lo))) + 'varid) + ((and (>= char ?0) (<= char ?9)) + 'number) + ((member char '(?\] ?\[ ?\( ?\) ?\{ ?\} ?\` ?\, ?\;)) + 'special)))) + +(defun haskell-lexeme-looking-at-token (&rest flags) + "Like `looking-at' but understands Haskell lexemes. + +Moves point forward over whitespace. Returns a symbol describing +type of Haskell token recognized. Use `match-string', +`match-beginning' and `match-end' with argument 0 to query match +result. + +Possible results are: +- 'special: for chars [](){}`,; +- 'comment: for single line comments +- 'nested-comment: for multiline comments +- 'qsymid: for qualified identifiers or symbols +- 'string: for strings literals +- 'char: for char literals +- 'number: for decimal, float, hexadecimal and octal number literals +- 'template-haskell-quote: for a string of apostrophes for template haskell +- 'template-haskell-quasi-quote: for a string of apostrophes for template haskell + +Note that for qualified symbols (match-string 1) returns the +unqualified identifier or symbol. Further qualification for +symbol or identifier can be done with: + + (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1))) + +See `haskell-lexeme-classify-by-first-char' for details." + (while + ;; Due to how unterminated strings terminate at newline, some + ;; newlines have syntax set to generic string delimeter. We want + ;; those to be treated as whitespace anyway + (or + (> (skip-syntax-forward "-") 0) + (and (not (member 'newline flags)) + (> (skip-chars-forward "\n") 0)))) + (let + ((case-fold-search nil) + (point (point-marker))) + (or + (and + (equal (string-to-syntax "<") + (get-char-property (point) 'syntax-table)) + (progn + (set-match-data (list point (set-marker (make-marker) (line-end-position)))) + 'literate-comment)) + (and (looking-at "\n") + 'newline) + (and (looking-at "{-") + (save-excursion + (forward-comment 1) + (set-match-data (list point (point-marker))) + 'nested-comment)) + (and (haskell-lexeme-looking-at-char-literal) + 'char) + (and (haskell-lexeme-looking-at-string-literal) + 'string) + (and (looking-at "[][(){}`,;]") + (if (haskell-lexeme-looking-at-quasi-quote-literal) + 'template-haskell-quasi-quote + 'special)) + (and (haskell-lexeme-looking-at-qidsym) + (if (save-match-data + (string-match "\\`---*\\'" (match-string-no-properties 0))) + (progn + (set-match-data (list point (set-marker (make-marker) (line-end-position)))) + 'comment) + 'qsymid)) + (and (looking-at haskell-lexeme-number) + 'number) + (and (looking-at "'+") + 'template-haskell-quote) + (and (looking-at ".") + 'illegal)))) + +(provide 'haskell-lexeme) + +;;; haskell-lexeme.el ends here |