1 | package lexers
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "regexp"
|
---|
5 | "strings"
|
---|
6 | "unicode/utf8"
|
---|
7 |
|
---|
8 | "github.com/dlclark/regexp2"
|
---|
9 |
|
---|
10 | . "github.com/alecthomas/chroma/v2" // nolint
|
---|
11 | )
|
---|
12 |
|
---|
13 | // Raku lexer.
|
---|
14 | var Raku Lexer = Register(MustNewLexer(
|
---|
15 | &Config{
|
---|
16 | Name: "Raku",
|
---|
17 | Aliases: []string{"perl6", "pl6", "raku"},
|
---|
18 | Filenames: []string{
|
---|
19 | "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
|
---|
20 | "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
|
---|
21 | },
|
---|
22 | MimeTypes: []string{
|
---|
23 | "text/x-perl6", "application/x-perl6",
|
---|
24 | "text/x-raku", "application/x-raku",
|
---|
25 | },
|
---|
26 | DotAll: true,
|
---|
27 | },
|
---|
28 | rakuRules,
|
---|
29 | ))
|
---|
30 |
|
---|
31 | func rakuRules() Rules {
|
---|
32 | type RakuToken int
|
---|
33 |
|
---|
34 | const (
|
---|
35 | rakuQuote RakuToken = iota
|
---|
36 | rakuNameAttribute
|
---|
37 | rakuPod
|
---|
38 | rakuPodFormatter
|
---|
39 | rakuPodDeclaration
|
---|
40 | rakuMultilineComment
|
---|
41 | rakuMatchRegex
|
---|
42 | rakuSubstitutionRegex
|
---|
43 | )
|
---|
44 |
|
---|
45 | const (
|
---|
46 | colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
|
---|
47 | colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
|
---|
48 | colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
|
---|
49 | colonPairLookahead = `(?=(:['\w-]+` +
|
---|
50 | colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
|
---|
51 | namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
|
---|
52 | variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern
|
---|
53 | globalVariablePattern = `[$@%&]+\*` + namePattern
|
---|
54 | )
|
---|
55 |
|
---|
56 | keywords := []string{
|
---|
57 | `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
|
---|
58 | `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
|
---|
59 | `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
|
---|
60 | `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
|
---|
61 | `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
|
---|
62 | `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
|
---|
63 | `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
|
---|
64 | `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
|
---|
65 | `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
|
---|
66 | `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
|
---|
67 | `dynamic-scope`, `built`, `temp`,
|
---|
68 | }
|
---|
69 |
|
---|
70 | keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
|
---|
71 |
|
---|
72 | wordOperators := []string{
|
---|
73 | `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
|
---|
74 | `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
|
---|
75 | `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
|
---|
76 | `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
|
---|
77 | `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
|
---|
78 | }
|
---|
79 |
|
---|
80 | wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
|
---|
81 |
|
---|
82 | operators := []string{
|
---|
83 | `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
|
---|
84 | `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
|
---|
85 | `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
|
---|
86 | `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
|
---|
87 | `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
|
---|
88 | `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
|
---|
89 | `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
|
---|
90 | `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
|
---|
91 | }
|
---|
92 |
|
---|
93 | operatorsPattern := Words(``, ``, operators...)
|
---|
94 |
|
---|
95 | builtinTypes := []string{
|
---|
96 | `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
|
---|
97 | `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
|
---|
98 | `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
|
---|
99 | `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
|
---|
100 | `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
|
---|
101 | `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
|
---|
102 | `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
|
---|
103 | `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
|
---|
104 | `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
|
---|
105 | `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
|
---|
106 | `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
|
---|
107 | `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
|
---|
108 | `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
|
---|
109 | `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
|
---|
110 | `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
|
---|
111 | `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
|
---|
112 | `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
|
---|
113 | `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
|
---|
114 | `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
|
---|
115 | `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
|
---|
116 | `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
|
---|
117 | `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
|
---|
118 | `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
|
---|
119 | `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
|
---|
120 | `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
|
---|
121 | `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
|
---|
122 | `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
|
---|
123 | `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
|
---|
124 | `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
|
---|
125 | `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
|
---|
126 | `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
|
---|
127 | `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
|
---|
128 | `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
|
---|
129 | `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
|
---|
130 | `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
|
---|
131 | `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
|
---|
132 | `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
|
---|
133 | `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
|
---|
134 | `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
|
---|
135 | `WhateverCode`, `WrapHandle`, `NativeCall`,
|
---|
136 | // Pragmas
|
---|
137 | `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
|
---|
138 | `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
|
---|
139 | `strict`, `trace`, `variables`,
|
---|
140 | }
|
---|
141 |
|
---|
142 | builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
|
---|
143 |
|
---|
144 | builtinRoutines := []string{
|
---|
145 | `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
|
---|
146 | `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
|
---|
147 | `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
|
---|
148 | `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
|
---|
149 | `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
|
---|
150 | `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
|
---|
151 | `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
|
---|
152 | `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
|
---|
153 | `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
|
---|
154 | `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
|
---|
155 | `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
|
---|
156 | `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
|
---|
157 | `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
|
---|
158 | `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
|
---|
159 | `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
|
---|
160 | `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
|
---|
161 | `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
|
---|
162 | `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
|
---|
163 | `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
|
---|
164 | `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
|
---|
165 | `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
|
---|
166 | `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
|
---|
167 | `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
|
---|
168 | `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
|
---|
169 | `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
|
---|
170 | `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
|
---|
171 | `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
|
---|
172 | `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
|
---|
173 | `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
|
---|
174 | `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
|
---|
175 | `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
|
---|
176 | `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
|
---|
177 | `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
|
---|
178 | `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
|
---|
179 | `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
|
---|
180 | `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
|
---|
181 | `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
|
---|
182 | `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
|
---|
183 | `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
|
---|
184 | `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
|
---|
185 | `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
|
---|
186 | `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
|
---|
187 | `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
|
---|
188 | `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
|
---|
189 | `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
|
---|
190 | `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
|
---|
191 | `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
|
---|
192 | `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
|
---|
193 | `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
|
---|
194 | `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
|
---|
195 | `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
|
---|
196 | `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
|
---|
197 | `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
|
---|
198 | `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
|
---|
199 | `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
|
---|
200 | `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
|
---|
201 | `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
|
---|
202 | `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
|
---|
203 | `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
|
---|
204 | `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
|
---|
205 | `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
|
---|
206 | `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
|
---|
207 | `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
|
---|
208 | `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
|
---|
209 | `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
|
---|
210 | `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
|
---|
211 | `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
|
---|
212 | `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
|
---|
213 | `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
|
---|
214 | `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
|
---|
215 | `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
|
---|
216 | `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
|
---|
217 | `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
|
---|
218 | `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
|
---|
219 | `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
|
---|
220 | `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
|
---|
221 | `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
|
---|
222 | `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
|
---|
223 | `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
|
---|
224 | `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
|
---|
225 | `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
|
---|
226 | `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
|
---|
227 | `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
|
---|
228 | `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
|
---|
229 | `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
|
---|
230 | `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
|
---|
231 | `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
|
---|
232 | `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
|
---|
233 | `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
|
---|
234 | `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
|
---|
235 | `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
|
---|
236 | `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
|
---|
237 | `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
|
---|
238 | `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
|
---|
239 | `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
|
---|
240 | `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
|
---|
241 | `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
|
---|
242 | `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
|
---|
243 | `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
|
---|
244 | `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
|
---|
245 | `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
|
---|
246 | `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
|
---|
247 | `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
|
---|
248 | `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
|
---|
249 | `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
|
---|
250 | `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
|
---|
251 | `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
|
---|
252 | `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
|
---|
253 | `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
|
---|
254 | `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
|
---|
255 | `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
|
---|
256 | `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
|
---|
257 | `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
|
---|
258 | `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
|
---|
259 | `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
|
---|
260 | `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
|
---|
261 | `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
|
---|
262 | `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
|
---|
263 | `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
|
---|
264 | }
|
---|
265 |
|
---|
266 | builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
|
---|
267 |
|
---|
268 | // A map of opening and closing brackets
|
---|
269 | brackets := map[rune]rune{
|
---|
270 | '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
|
---|
271 | '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
|
---|
272 | '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
|
---|
273 | '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
|
---|
274 | '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
|
---|
275 | '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
|
---|
276 | '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
|
---|
277 | '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
|
---|
278 | '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
|
---|
279 | '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
|
---|
280 | '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
|
---|
281 | '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
|
---|
282 | '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
|
---|
283 | '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
|
---|
284 | '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
|
---|
285 | '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
|
---|
286 | '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
|
---|
287 | '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
|
---|
288 | '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
|
---|
289 | '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
|
---|
290 | '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
|
---|
291 | '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
|
---|
292 | '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
|
---|
293 | '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
|
---|
294 | '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
|
---|
295 | '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
|
---|
296 | '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
|
---|
297 | '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
|
---|
298 | '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
|
---|
299 | '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
|
---|
300 | '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
|
---|
301 | '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
|
---|
302 | '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
|
---|
303 | '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
|
---|
304 | '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
|
---|
305 | '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
|
---|
306 | '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
|
---|
307 | '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
|
---|
308 | '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
|
---|
309 | '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
|
---|
310 | '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
|
---|
311 | '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
|
---|
312 | '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
|
---|
313 | '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
|
---|
314 | '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
|
---|
315 | '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
|
---|
316 | '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
|
---|
317 | '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
|
---|
318 | '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
|
---|
319 | '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
|
---|
320 | '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
|
---|
321 | '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
|
---|
322 | '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
|
---|
323 | '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
|
---|
324 | '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
|
---|
325 | '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
|
---|
326 | '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
|
---|
327 | '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
|
---|
328 | '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
|
---|
329 | '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
|
---|
330 | '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
|
---|
331 | '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
|
---|
332 | '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
|
---|
333 | '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
|
---|
334 | }
|
---|
335 |
|
---|
336 | bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
|
---|
337 |
|
---|
338 | // Finds opening brackets and their closing counterparts (including pod and heredoc)
|
---|
339 | // and modifies state groups and position accordingly
|
---|
340 | findBrackets := func(tokenClass RakuToken) MutatorFunc {
|
---|
341 | return func(state *LexerState) error {
|
---|
342 | var openingChars []rune
|
---|
343 | var adverbs []rune
|
---|
344 | switch tokenClass {
|
---|
345 | case rakuPod:
|
---|
346 | openingChars = []rune(strings.Join(state.Groups[1:5], ``))
|
---|
347 | default:
|
---|
348 | adverbs = []rune(state.NamedGroups[`adverbs`])
|
---|
349 | openingChars = []rune(state.NamedGroups[`opening_delimiters`])
|
---|
350 | }
|
---|
351 |
|
---|
352 | openingChar := openingChars[0]
|
---|
353 |
|
---|
354 | nChars := len(openingChars)
|
---|
355 |
|
---|
356 | var closingChar rune
|
---|
357 | var closingCharExists bool
|
---|
358 | var closingChars []rune
|
---|
359 |
|
---|
360 | switch tokenClass {
|
---|
361 | case rakuPod:
|
---|
362 | closingCharExists = true
|
---|
363 | default:
|
---|
364 | closingChar, closingCharExists = brackets[openingChar]
|
---|
365 | }
|
---|
366 |
|
---|
367 | switch tokenClass {
|
---|
368 | case rakuPodFormatter:
|
---|
369 | formatter := StringOther
|
---|
370 |
|
---|
371 | switch state.NamedGroups[`keyword`] {
|
---|
372 | case "B":
|
---|
373 | formatter = GenericStrong
|
---|
374 | case "I":
|
---|
375 | formatter = GenericEmph
|
---|
376 | case "U":
|
---|
377 | formatter = GenericUnderline
|
---|
378 | }
|
---|
379 |
|
---|
380 | formatterRule := ruleReplacingConfig{
|
---|
381 | pattern: `.+?`,
|
---|
382 | tokenType: formatter,
|
---|
383 | mutator: nil,
|
---|
384 | stateName: `pod-formatter`,
|
---|
385 | rulePosition: bottomRule,
|
---|
386 | }
|
---|
387 |
|
---|
388 | err := replaceRule(formatterRule)(state)
|
---|
389 | if err != nil {
|
---|
390 | panic(err)
|
---|
391 | }
|
---|
392 |
|
---|
393 | err = replaceRule(ruleReplacingConfig{
|
---|
394 | delimiter: []rune{closingChar},
|
---|
395 | tokenType: Punctuation,
|
---|
396 | stateName: `pod-formatter`,
|
---|
397 | pushState: true,
|
---|
398 | numberOfDelimiterChars: nChars,
|
---|
399 | appendMutator: popRule(formatterRule),
|
---|
400 | })(state)
|
---|
401 | if err != nil {
|
---|
402 | panic(err)
|
---|
403 | }
|
---|
404 |
|
---|
405 | return nil
|
---|
406 | case rakuMatchRegex:
|
---|
407 | var delimiter []rune
|
---|
408 | if closingCharExists {
|
---|
409 | delimiter = []rune{closingChar}
|
---|
410 | } else {
|
---|
411 | delimiter = openingChars
|
---|
412 | }
|
---|
413 |
|
---|
414 | err := replaceRule(ruleReplacingConfig{
|
---|
415 | delimiter: delimiter,
|
---|
416 | tokenType: Punctuation,
|
---|
417 | stateName: `regex`,
|
---|
418 | popState: true,
|
---|
419 | pushState: true,
|
---|
420 | })(state)
|
---|
421 | if err != nil {
|
---|
422 | panic(err)
|
---|
423 | }
|
---|
424 |
|
---|
425 | return nil
|
---|
426 | case rakuSubstitutionRegex:
|
---|
427 | delimiter := regexp2.Escape(string(openingChars))
|
---|
428 |
|
---|
429 | err := replaceRule(ruleReplacingConfig{
|
---|
430 | pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
|
---|
431 | tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
|
---|
432 | rulePosition: topRule,
|
---|
433 | stateName: `regex`,
|
---|
434 | popState: true,
|
---|
435 | pushState: true,
|
---|
436 | })(state)
|
---|
437 | if err != nil {
|
---|
438 | panic(err)
|
---|
439 | }
|
---|
440 |
|
---|
441 | return nil
|
---|
442 | }
|
---|
443 |
|
---|
444 | text := state.Text
|
---|
445 |
|
---|
446 | var endPos int
|
---|
447 |
|
---|
448 | var nonMirroredOpeningCharPosition int
|
---|
449 |
|
---|
450 | if !closingCharExists {
|
---|
451 | // it's not a mirrored character, which means we
|
---|
452 | // just need to look for the next occurrence
|
---|
453 | closingChars = openingChars
|
---|
454 | nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
|
---|
455 | endPos = nonMirroredOpeningCharPosition
|
---|
456 | } else {
|
---|
457 | var podRegex *regexp2.Regexp
|
---|
458 | if tokenClass == rakuPod {
|
---|
459 | podRegex = regexp2.MustCompile(
|
---|
460 | state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
|
---|
461 | 0,
|
---|
462 | )
|
---|
463 | } else {
|
---|
464 | closingChars = []rune(strings.Repeat(string(closingChar), nChars))
|
---|
465 | }
|
---|
466 |
|
---|
467 | // we need to look for the corresponding closing character,
|
---|
468 | // keep nesting in mind
|
---|
469 | nestingLevel := 1
|
---|
470 |
|
---|
471 | searchPos := state.Pos - nChars
|
---|
472 |
|
---|
473 | var nextClosePos int
|
---|
474 |
|
---|
475 | for nestingLevel > 0 {
|
---|
476 | if tokenClass == rakuPod {
|
---|
477 | match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
|
---|
478 | if err == nil {
|
---|
479 | closingChars = match.Runes()
|
---|
480 | nextClosePos = match.Index
|
---|
481 | } else {
|
---|
482 | nextClosePos = -1
|
---|
483 | }
|
---|
484 | } else {
|
---|
485 | nextClosePos = indexAt(text, closingChars, searchPos+nChars)
|
---|
486 | }
|
---|
487 |
|
---|
488 | nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
|
---|
489 |
|
---|
490 | switch {
|
---|
491 | case nextClosePos == -1:
|
---|
492 | nextClosePos = len(text)
|
---|
493 | nestingLevel = 0
|
---|
494 | case nextOpenPos != -1 && nextOpenPos < nextClosePos:
|
---|
495 | nestingLevel++
|
---|
496 | nChars = len(openingChars)
|
---|
497 | searchPos = nextOpenPos
|
---|
498 | default: // next_close_pos < next_open_pos
|
---|
499 | nestingLevel--
|
---|
500 | nChars = len(closingChars)
|
---|
501 | searchPos = nextClosePos
|
---|
502 | }
|
---|
503 | }
|
---|
504 |
|
---|
505 | endPos = nextClosePos
|
---|
506 | }
|
---|
507 |
|
---|
508 | if endPos < 0 {
|
---|
509 | // if we didn't find a closer, just highlight the
|
---|
510 | // rest of the text in this class
|
---|
511 | endPos = len(text)
|
---|
512 | }
|
---|
513 |
|
---|
514 | adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
|
---|
515 | var heredocTerminator []rune
|
---|
516 | var endHeredocPos int
|
---|
517 | if adverbre.MatchString(string(adverbs)) {
|
---|
518 | if endPos != len(text) {
|
---|
519 | heredocTerminator = text[state.Pos:endPos]
|
---|
520 | nChars = len(heredocTerminator)
|
---|
521 | } else {
|
---|
522 | endPos = state.Pos + 1
|
---|
523 | heredocTerminator = []rune{}
|
---|
524 | nChars = 0
|
---|
525 | }
|
---|
526 |
|
---|
527 | if nChars > 0 {
|
---|
528 | endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
|
---|
529 | if endHeredocPos > -1 {
|
---|
530 | endPos += endHeredocPos
|
---|
531 | } else {
|
---|
532 | endPos = len(text)
|
---|
533 | }
|
---|
534 | }
|
---|
535 | }
|
---|
536 |
|
---|
537 | textBetweenBrackets := string(text[state.Pos:endPos])
|
---|
538 | switch tokenClass {
|
---|
539 | case rakuPod, rakuPodDeclaration, rakuNameAttribute:
|
---|
540 | state.NamedGroups[`value`] = textBetweenBrackets
|
---|
541 | state.NamedGroups[`closing_delimiters`] = string(closingChars)
|
---|
542 | case rakuQuote:
|
---|
543 | if len(heredocTerminator) > 0 {
|
---|
544 | // Length of heredoc terminator + closing chars + `;`
|
---|
545 | heredocFristPunctuationLen := nChars + len(openingChars) + 1
|
---|
546 |
|
---|
547 | state.NamedGroups[`opening_delimiters`] = string(openingChars) +
|
---|
548 | string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
|
---|
549 |
|
---|
550 | state.NamedGroups[`value`] =
|
---|
551 | string(text[state.Pos+heredocFristPunctuationLen : endPos])
|
---|
552 |
|
---|
553 | if endHeredocPos > -1 {
|
---|
554 | state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
|
---|
555 | }
|
---|
556 | } else {
|
---|
557 | state.NamedGroups[`value`] = textBetweenBrackets
|
---|
558 | if nChars > 0 {
|
---|
559 | state.NamedGroups[`closing_delimiters`] = string(closingChars)
|
---|
560 | }
|
---|
561 | }
|
---|
562 | default:
|
---|
563 | state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
|
---|
564 | }
|
---|
565 |
|
---|
566 | state.Pos = endPos + nChars
|
---|
567 |
|
---|
568 | return nil
|
---|
569 | }
|
---|
570 | }
|
---|
571 |
|
---|
572 | // Raku rules
|
---|
573 | // Empty capture groups are placeholders and will be replaced by mutators
|
---|
574 | // DO NOT REMOVE THEM!
|
---|
575 | return Rules{
|
---|
576 | "root": {
|
---|
577 | // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
|
---|
578 | {`\A\z`, nil, nil},
|
---|
579 | Include("common"),
|
---|
580 | {`{`, Punctuation, Push(`root`)},
|
---|
581 | {`\(`, Punctuation, Push(`root`)},
|
---|
582 | {`[)}]`, Punctuation, Pop(1)},
|
---|
583 | {`;`, Punctuation, nil},
|
---|
584 | {`\[|\]`, Operator, nil},
|
---|
585 | {`.+?`, Text, nil},
|
---|
586 | },
|
---|
587 | "common": {
|
---|
588 | {`^#![^\n]*$`, CommentHashbang, nil},
|
---|
589 | Include("pod"),
|
---|
590 | // Multi-line, Embedded comment
|
---|
591 | {
|
---|
592 | "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
|
---|
593 | CommentMultiline,
|
---|
594 | findBrackets(rakuMultilineComment),
|
---|
595 | },
|
---|
596 | {`#[^\n]*$`, CommentSingle, nil},
|
---|
597 | // /regex/
|
---|
598 | {
|
---|
599 | `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
|
---|
600 | ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
|
---|
601 | nil,
|
---|
602 | },
|
---|
603 | Include("variable"),
|
---|
604 | // ::?VARIABLE
|
---|
605 | {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
|
---|
606 | // Version
|
---|
607 | {
|
---|
608 | `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
|
---|
609 | ByGroups(Keyword, NumberInteger, NameEntity, Operator),
|
---|
610 | nil,
|
---|
611 | },
|
---|
612 | Include("number"),
|
---|
613 | // Hyperoperator | »*«
|
---|
614 | {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
|
---|
615 | {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
|
---|
616 | // Hyperoperator | «*«
|
---|
617 | {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
|
---|
618 | {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
|
---|
619 | // Hyperoperator | »*»
|
---|
620 | {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
|
---|
621 | {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
|
---|
622 | // <<quoted words>>
|
---|
623 | {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
|
---|
624 | // «quoted words»
|
---|
625 | {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
|
---|
626 | // [<]
|
---|
627 | {`(?<=\[\\?)<(?=\])`, Operator, nil},
|
---|
628 | // < and > operators | something < onething > something
|
---|
629 | {
|
---|
630 | `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
|
---|
631 | ByGroups(Operator, UsingSelf("root"), Operator),
|
---|
632 | nil,
|
---|
633 | },
|
---|
634 | // <quoted words>
|
---|
635 | {
|
---|
636 | `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
|
---|
637 | ByGroups(Punctuation, String, Punctuation),
|
---|
638 | nil,
|
---|
639 | },
|
---|
640 | {`C?X::['\w:-]+`, NameException, nil},
|
---|
641 | Include("metaoperator"),
|
---|
642 | // Pair | key => value
|
---|
643 | {
|
---|
644 | `(\w[\w'-]*)(\s*)(=>)`,
|
---|
645 | ByGroups(String, Text, Operator),
|
---|
646 | nil,
|
---|
647 | },
|
---|
648 | Include("colon-pair"),
|
---|
649 | // Token
|
---|
650 | {
|
---|
651 | `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
|
---|
652 | NameFunction,
|
---|
653 | Push("token", "name-adverb"),
|
---|
654 | },
|
---|
655 | // Substitution
|
---|
656 | {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
|
---|
657 | {keywordsPattern, Keyword, nil},
|
---|
658 | {builtinTypesPattern, NameBuiltin, nil},
|
---|
659 | {builtinRoutinesPattern, NameBuiltin, nil},
|
---|
660 | // Class name
|
---|
661 | {
|
---|
662 | `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
|
---|
663 | NameClass,
|
---|
664 | Push("name-adverb"),
|
---|
665 | },
|
---|
666 | // Routine
|
---|
667 | {
|
---|
668 | `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
|
---|
669 | NameFunction,
|
---|
670 | Push("name-adverb"),
|
---|
671 | },
|
---|
672 | // Constant
|
---|
673 | {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
|
---|
674 | // Namespace
|
---|
675 | {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
|
---|
676 | Include("operator"),
|
---|
677 | Include("single-quote"),
|
---|
678 | {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
|
---|
679 | // m,rx regex
|
---|
680 | {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
|
---|
681 | // Quote constructs
|
---|
682 | {
|
---|
683 | `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
|
---|
684 | EmitterFunc(quote),
|
---|
685 | findBrackets(rakuQuote),
|
---|
686 | },
|
---|
687 | // Function
|
---|
688 | {
|
---|
689 | `\b` + namePattern + colonPairLookahead + `\()`,
|
---|
690 | NameFunction,
|
---|
691 | Push("name-adverb"),
|
---|
692 | },
|
---|
693 | // Method
|
---|
694 | {
|
---|
695 | `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
|
---|
696 | NameFunction,
|
---|
697 | Push("name-adverb"),
|
---|
698 | },
|
---|
699 | // Indirect invocant
|
---|
700 | {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
|
---|
701 | {`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
|
---|
702 | {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
|
---|
703 | {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
|
---|
704 | // Sigilless variable
|
---|
705 | {
|
---|
706 | `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
|
---|
707 | NameVariable,
|
---|
708 | Push("name-adverb"),
|
---|
709 | },
|
---|
710 | {namePattern, Name, Push("name-adverb")},
|
---|
711 | },
|
---|
712 | "rx": {
|
---|
713 | Include("colon-pair-attribute"),
|
---|
714 | {
|
---|
715 | `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
|
---|
716 | ByGroupNames(
|
---|
717 | map[string]Emitter{
|
---|
718 | `opening_delimiters`: Punctuation,
|
---|
719 | `delimiter`: nil,
|
---|
720 | },
|
---|
721 | ),
|
---|
722 | findBrackets(rakuMatchRegex),
|
---|
723 | },
|
---|
724 | },
|
---|
725 | "substitution": {
|
---|
726 | Include("colon-pair-attribute"),
|
---|
727 | // Substitution | s{regex} = value
|
---|
728 | {
|
---|
729 | `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
|
---|
730 | ByGroupNames(map[string]Emitter{
|
---|
731 | `opening_delimiters`: Punctuation,
|
---|
732 | `delimiter`: nil,
|
---|
733 | }),
|
---|
734 | findBrackets(rakuMatchRegex),
|
---|
735 | },
|
---|
736 | // Substitution | s/regex/string/
|
---|
737 | {
|
---|
738 | `(?<opening_delimiters>[^\w:\s])`,
|
---|
739 | Punctuation,
|
---|
740 | findBrackets(rakuSubstitutionRegex),
|
---|
741 | },
|
---|
742 | },
|
---|
743 | "number": {
|
---|
744 | {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
|
---|
745 | {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
|
---|
746 | {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
|
---|
747 | {
|
---|
748 | `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
|
---|
749 | LiteralNumberFloat,
|
---|
750 | nil,
|
---|
751 | },
|
---|
752 | {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
|
---|
753 | {`(?<=\d+)i`, NameConstant, nil},
|
---|
754 | {`\d+(_\d+)*`, LiteralNumberInteger, nil},
|
---|
755 | },
|
---|
756 | "name-adverb": {
|
---|
757 | Include("colon-pair-attribute-keyvalue"),
|
---|
758 | Default(Pop(1)),
|
---|
759 | },
|
---|
760 | "colon-pair": {
|
---|
761 | // :key(value)
|
---|
762 | {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
|
---|
763 | // :123abc
|
---|
764 | {
|
---|
765 | `(:)(\d+)(\w[\w'-]*)`,
|
---|
766 | ByGroups(Punctuation, UsingSelf("number"), String),
|
---|
767 | nil,
|
---|
768 | },
|
---|
769 | // :key
|
---|
770 | {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
|
---|
771 | {`\s+`, Text, nil},
|
---|
772 | },
|
---|
773 | "colon-pair-attribute": {
|
---|
774 | // :key(value)
|
---|
775 | {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
|
---|
776 | // :123abc
|
---|
777 | {
|
---|
778 | `(:)(\d+)(\w[\w'-]*)`,
|
---|
779 | ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
|
---|
780 | nil,
|
---|
781 | },
|
---|
782 | // :key
|
---|
783 | {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
|
---|
784 | {`\s+`, Text, nil},
|
---|
785 | },
|
---|
786 | "colon-pair-attribute-keyvalue": {
|
---|
787 | // :key(value)
|
---|
788 | {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
|
---|
789 | },
|
---|
790 | "escape-qq": {
|
---|
791 | {
|
---|
792 | `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
|
---|
793 | ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
|
---|
794 | nil,
|
---|
795 | },
|
---|
796 | },
|
---|
797 | `escape-char`: {
|
---|
798 | {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
|
---|
799 | },
|
---|
800 | `escape-single-quote`: {
|
---|
801 | {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
|
---|
802 | },
|
---|
803 | "escape-c-name": {
|
---|
804 | {
|
---|
805 | `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
|
---|
806 | ByGroups(StringEscape, Punctuation, String, Punctuation),
|
---|
807 | nil,
|
---|
808 | },
|
---|
809 | },
|
---|
810 | "escape-hexadecimal": {
|
---|
811 | {
|
---|
812 | `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
|
---|
813 | ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
|
---|
814 | nil,
|
---|
815 | },
|
---|
816 | {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
|
---|
817 | },
|
---|
818 | "regex": {
|
---|
819 | // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
|
---|
820 | {`\A\z`, nil, nil},
|
---|
821 | Include("regex-escape-class"),
|
---|
822 | Include(`regex-character-escape`),
|
---|
823 | // $(code)
|
---|
824 | {
|
---|
825 | `([$@])((?<!(?<!\\)\\)\()`,
|
---|
826 | ByGroups(Keyword, Punctuation),
|
---|
827 | replaceRule(ruleReplacingConfig{
|
---|
828 | delimiter: []rune(`)`),
|
---|
829 | tokenType: Punctuation,
|
---|
830 | stateName: `root`,
|
---|
831 | pushState: true,
|
---|
832 | }),
|
---|
833 | },
|
---|
834 | // Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
|
---|
835 | {`\$(?=/)`, NameEntity, nil},
|
---|
836 | // Exclude $ from variables
|
---|
837 | {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
|
---|
838 | Include("variable"),
|
---|
839 | Include("escape-c-name"),
|
---|
840 | Include("escape-hexadecimal"),
|
---|
841 | Include("number"),
|
---|
842 | Include("single-quote"),
|
---|
843 | // :my variable code ...
|
---|
844 | {
|
---|
845 | `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
|
---|
846 | ByGroups(Operator, KeywordDeclaration),
|
---|
847 | replaceRule(ruleReplacingConfig{
|
---|
848 | delimiter: []rune(`;`),
|
---|
849 | tokenType: Punctuation,
|
---|
850 | stateName: `root`,
|
---|
851 | pushState: true,
|
---|
852 | }),
|
---|
853 | },
|
---|
854 | // <{code}>
|
---|
855 | {
|
---|
856 | `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
|
---|
857 | ByGroups(Punctuation, Operator, Punctuation),
|
---|
858 | replaceRule(ruleReplacingConfig{
|
---|
859 | delimiter: []rune(`}>`),
|
---|
860 | tokenType: Punctuation,
|
---|
861 | stateName: `root`,
|
---|
862 | pushState: true,
|
---|
863 | }),
|
---|
864 | },
|
---|
865 | // {code}
|
---|
866 | Include(`closure`),
|
---|
867 | // Properties
|
---|
868 | {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
|
---|
869 | // Operator
|
---|
870 | {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
|
---|
871 | // Anchors
|
---|
872 | {`\^\^|\^|\$\$|\$`, NameEntity, nil},
|
---|
873 | {`\.`, NameEntity, nil},
|
---|
874 | {`#[^\n]*\n`, CommentSingle, nil},
|
---|
875 | // Lookaround
|
---|
876 | {
|
---|
877 | `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
|
---|
878 | ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
|
---|
879 | replaceRule(ruleReplacingConfig{
|
---|
880 | delimiter: []rune(`>`),
|
---|
881 | tokenType: Punctuation,
|
---|
882 | stateName: `regex`,
|
---|
883 | pushState: true,
|
---|
884 | }),
|
---|
885 | },
|
---|
886 | {
|
---|
887 | `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
|
---|
888 | ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
|
---|
889 | nil,
|
---|
890 | },
|
---|
891 | // <$variable>
|
---|
892 | {
|
---|
893 | `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
|
---|
894 | ByGroups(Punctuation, Operator, NameVariable, Punctuation),
|
---|
895 | nil,
|
---|
896 | },
|
---|
897 | // Capture markers
|
---|
898 | {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
|
---|
899 | {
|
---|
900 | `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
|
---|
901 | ByGroups(Punctuation, NameVariable, Operator),
|
---|
902 | Push(`regex-variable`),
|
---|
903 | },
|
---|
904 | {
|
---|
905 | `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
|
---|
906 | ByGroups(Punctuation, Operator, NameFunction),
|
---|
907 | Push(`regex-function`),
|
---|
908 | },
|
---|
909 | {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
|
---|
910 | {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
|
---|
911 | {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
|
---|
912 | {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
|
---|
913 | {`.+?`, StringRegex, nil},
|
---|
914 | },
|
---|
915 | "regex-class-builtin": {
|
---|
916 | {
|
---|
917 | `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
|
---|
918 | NameBuiltin,
|
---|
919 | nil,
|
---|
920 | },
|
---|
921 | },
|
---|
922 | "regex-function": {
|
---|
923 | // <function>
|
---|
924 | {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
|
---|
925 | // <function(parameter)>
|
---|
926 | {
|
---|
927 | `\(`,
|
---|
928 | Punctuation,
|
---|
929 | replaceRule(ruleReplacingConfig{
|
---|
930 | delimiter: []rune(`)>`),
|
---|
931 | tokenType: Punctuation,
|
---|
932 | stateName: `root`,
|
---|
933 | popState: true,
|
---|
934 | pushState: true,
|
---|
935 | }),
|
---|
936 | },
|
---|
937 | // <function value>
|
---|
938 | {
|
---|
939 | `\s+`,
|
---|
940 | StringRegex,
|
---|
941 | replaceRule(ruleReplacingConfig{
|
---|
942 | delimiter: []rune(`>`),
|
---|
943 | tokenType: Punctuation,
|
---|
944 | stateName: `regex`,
|
---|
945 | popState: true,
|
---|
946 | pushState: true,
|
---|
947 | }),
|
---|
948 | },
|
---|
949 | // <function: value>
|
---|
950 | {
|
---|
951 | `:`,
|
---|
952 | Punctuation,
|
---|
953 | replaceRule(ruleReplacingConfig{
|
---|
954 | delimiter: []rune(`>`),
|
---|
955 | tokenType: Punctuation,
|
---|
956 | stateName: `root`,
|
---|
957 | popState: true,
|
---|
958 | pushState: true,
|
---|
959 | }),
|
---|
960 | },
|
---|
961 | },
|
---|
962 | "regex-variable": {
|
---|
963 | Include(`regex-starting-operators`),
|
---|
964 | // <var=function(
|
---|
965 | {
|
---|
966 | `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
|
---|
967 | ByGroups(Operator, NameFunction),
|
---|
968 | Mutators(Pop(1), Push(`regex-function`)),
|
---|
969 | },
|
---|
970 | // <var=function>
|
---|
971 | {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
|
---|
972 | // <var=
|
---|
973 | Default(Pop(1), Push(`regex-property`)),
|
---|
974 | },
|
---|
975 | "regex-property": {
|
---|
976 | {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
|
---|
977 | Include("regex-class-builtin"),
|
---|
978 | Include("variable"),
|
---|
979 | Include(`regex-starting-operators`),
|
---|
980 | Include("colon-pair-attribute"),
|
---|
981 | {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
|
---|
982 | {`\+|\-`, Operator, nil},
|
---|
983 | {`@[\w':-]+`, NameVariable, nil},
|
---|
984 | {`.+?`, StringRegex, nil},
|
---|
985 | },
|
---|
986 | `regex-starting-operators`: {
|
---|
987 | {`(?<=<)[|!?.]+`, Operator, nil},
|
---|
988 | },
|
---|
989 | "regex-escape-class": {
|
---|
990 | {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
|
---|
991 | },
|
---|
992 | `regex-character-escape`: {
|
---|
993 | {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
|
---|
994 | },
|
---|
995 | "regex-character-class": {
|
---|
996 | {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
|
---|
997 | Include("regex-escape-class"),
|
---|
998 | Include("escape-c-name"),
|
---|
999 | Include("escape-hexadecimal"),
|
---|
1000 | Include(`regex-character-escape`),
|
---|
1001 | Include("number"),
|
---|
1002 | {`\.\.`, Operator, nil},
|
---|
1003 | {`.+?`, StringRegex, nil},
|
---|
1004 | },
|
---|
1005 | "metaoperator": {
|
---|
1006 | // Z[=>]
|
---|
1007 | {
|
---|
1008 | `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
|
---|
1009 | ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
|
---|
1010 | nil,
|
---|
1011 | },
|
---|
1012 | // Z=>
|
---|
1013 | {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
|
---|
1014 | },
|
---|
1015 | "operator": {
|
---|
1016 | // Word Operator
|
---|
1017 | {wordOperatorsPattern, OperatorWord, nil},
|
---|
1018 | // Operator
|
---|
1019 | {operatorsPattern, Operator, nil},
|
---|
1020 | },
|
---|
1021 | "pod": {
|
---|
1022 | // Single-line pod declaration
|
---|
1023 | {`(#[|=])\s`, Keyword, Push("pod-single")},
|
---|
1024 | // Multi-line pod declaration
|
---|
1025 | {
|
---|
1026 | "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
|
---|
1027 | ByGroupNames(
|
---|
1028 | map[string]Emitter{
|
---|
1029 | `keyword`: Keyword,
|
---|
1030 | `opening_delimiters`: Punctuation,
|
---|
1031 | `delimiter`: nil,
|
---|
1032 | `value`: UsingSelf("pod-declaration"),
|
---|
1033 | `closing_delimiters`: Punctuation,
|
---|
1034 | }),
|
---|
1035 | findBrackets(rakuPodDeclaration),
|
---|
1036 | },
|
---|
1037 | Include("pod-blocks"),
|
---|
1038 | },
|
---|
1039 | "pod-blocks": {
|
---|
1040 | // =begin code
|
---|
1041 | {
|
---|
1042 | `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
|
---|
1043 | EmitterFunc(podCode),
|
---|
1044 | nil,
|
---|
1045 | },
|
---|
1046 | // =begin
|
---|
1047 | {
|
---|
1048 | `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
|
---|
1049 | ByGroupNames(
|
---|
1050 | map[string]Emitter{
|
---|
1051 | `ws`: Comment,
|
---|
1052 | `keyword`: Keyword,
|
---|
1053 | `ws2`: StringDoc,
|
---|
1054 | `name`: Keyword,
|
---|
1055 | `config`: EmitterFunc(podConfig),
|
---|
1056 | `value`: UsingSelf("pod-begin"),
|
---|
1057 | `closing_delimiters`: Keyword,
|
---|
1058 | }),
|
---|
1059 | findBrackets(rakuPod),
|
---|
1060 | },
|
---|
1061 | // =for ...
|
---|
1062 | {
|
---|
1063 | `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
|
---|
1064 | ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
|
---|
1065 | Push("pod-paragraph"),
|
---|
1066 | },
|
---|
1067 | // =config
|
---|
1068 | {
|
---|
1069 | `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
|
---|
1070 | ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
|
---|
1071 | nil,
|
---|
1072 | },
|
---|
1073 | // =alias
|
---|
1074 | {
|
---|
1075 | `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
|
---|
1076 | ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
|
---|
1077 | nil,
|
---|
1078 | },
|
---|
1079 | // =encoding
|
---|
1080 | {
|
---|
1081 | `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
|
---|
1082 | ByGroups(Comment, Keyword, StringDoc, Name),
|
---|
1083 | nil,
|
---|
1084 | },
|
---|
1085 | // =para ...
|
---|
1086 | {
|
---|
1087 | `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
|
---|
1088 | ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
|
---|
1089 | Push("pod-paragraph"),
|
---|
1090 | },
|
---|
1091 | // =head1 ...
|
---|
1092 | {
|
---|
1093 | `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
|
---|
1094 | ByGroups(Comment, Keyword, GenericHeading, Keyword),
|
---|
1095 | Push("pod-heading"),
|
---|
1096 | },
|
---|
1097 | // =item ...
|
---|
1098 | {
|
---|
1099 | `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
|
---|
1100 | ByGroups(Comment, Keyword, StringDoc, Keyword),
|
---|
1101 | Push("pod-paragraph"),
|
---|
1102 | },
|
---|
1103 | {
|
---|
1104 | `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
|
---|
1105 | ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
|
---|
1106 | Push("pod-finish"),
|
---|
1107 | },
|
---|
1108 | // ={custom} ...
|
---|
1109 | {
|
---|
1110 | `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
|
---|
1111 | ByGroups(Comment, Name, StringDoc, Keyword),
|
---|
1112 | Push("pod-paragraph"),
|
---|
1113 | },
|
---|
1114 | // = podconfig
|
---|
1115 | {
|
---|
1116 | `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
|
---|
1117 | colonPairClosingBrackets + `) *)*\n)`,
|
---|
1118 | ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
|
---|
1119 | nil,
|
---|
1120 | },
|
---|
1121 | },
|
---|
1122 | "pod-begin": {
|
---|
1123 | Include("pod-blocks"),
|
---|
1124 | Include("pre-pod-formatter"),
|
---|
1125 | {`.+?`, StringDoc, nil},
|
---|
1126 | },
|
---|
1127 | "pod-declaration": {
|
---|
1128 | Include("pre-pod-formatter"),
|
---|
1129 | {`.+?`, StringDoc, nil},
|
---|
1130 | },
|
---|
1131 | "pod-paragraph": {
|
---|
1132 | {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
|
---|
1133 | Include("pre-pod-formatter"),
|
---|
1134 | {`.+?`, StringDoc, nil},
|
---|
1135 | },
|
---|
1136 | "pod-single": {
|
---|
1137 | {`\n`, StringDoc, Pop(1)},
|
---|
1138 | Include("pre-pod-formatter"),
|
---|
1139 | {`.+?`, StringDoc, nil},
|
---|
1140 | },
|
---|
1141 | "pod-heading": {
|
---|
1142 | {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
|
---|
1143 | Include("pre-pod-formatter"),
|
---|
1144 | {`.+?`, GenericHeading, nil},
|
---|
1145 | },
|
---|
1146 | "pod-finish": {
|
---|
1147 | {`\z`, nil, Pop(1)},
|
---|
1148 | Include("pre-pod-formatter"),
|
---|
1149 | {`.+?`, StringDoc, nil},
|
---|
1150 | },
|
---|
1151 | "pre-pod-formatter": {
|
---|
1152 | // C<code>, B<bold>, ...
|
---|
1153 | {
|
---|
1154 | `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
|
---|
1155 | ByGroups(Keyword, Punctuation),
|
---|
1156 | findBrackets(rakuPodFormatter),
|
---|
1157 | },
|
---|
1158 | },
|
---|
1159 | "pod-formatter": {
|
---|
1160 | // Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
|
---|
1161 | {`>`, Punctuation, Pop(1)},
|
---|
1162 | Include("pre-pod-formatter"),
|
---|
1163 | // Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
|
---|
1164 | {`.+?`, StringOther, nil},
|
---|
1165 | },
|
---|
1166 | "variable": {
|
---|
1167 | {variablePattern, NameVariable, Push("name-adverb")},
|
---|
1168 | {globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
|
---|
1169 | {`[$@]<[^>]+>`, NameVariable, nil},
|
---|
1170 | {`\$[/!¢]`, NameVariable, nil},
|
---|
1171 | {`[$@%]`, NameVariable, nil},
|
---|
1172 | },
|
---|
1173 | "single-quote": {
|
---|
1174 | {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
|
---|
1175 | },
|
---|
1176 | "single-quote-inner": {
|
---|
1177 | {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
|
---|
1178 | Include("escape-single-quote"),
|
---|
1179 | Include("escape-qq"),
|
---|
1180 | {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
|
---|
1181 | },
|
---|
1182 | "double-quotes": {
|
---|
1183 | {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
|
---|
1184 | Include("qq"),
|
---|
1185 | },
|
---|
1186 | "<<": {
|
---|
1187 | {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
|
---|
1188 | Include("ww"),
|
---|
1189 | },
|
---|
1190 | "«": {
|
---|
1191 | {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
|
---|
1192 | Include("ww"),
|
---|
1193 | },
|
---|
1194 | "ww": {
|
---|
1195 | Include("single-quote"),
|
---|
1196 | Include("qq"),
|
---|
1197 | },
|
---|
1198 | "qq": {
|
---|
1199 | Include("qq-variable"),
|
---|
1200 | Include("closure"),
|
---|
1201 | Include(`escape-char`),
|
---|
1202 | Include("escape-hexadecimal"),
|
---|
1203 | Include("escape-c-name"),
|
---|
1204 | Include("escape-qq"),
|
---|
1205 | {`.+?`, StringDouble, nil},
|
---|
1206 | },
|
---|
1207 | "qq-variable": {
|
---|
1208 | {
|
---|
1209 | `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
|
---|
1210 | NameVariable,
|
---|
1211 | Push("qq-variable-extras", "name-adverb"),
|
---|
1212 | },
|
---|
1213 | },
|
---|
1214 | "qq-variable-extras": {
|
---|
1215 | // Method
|
---|
1216 | {
|
---|
1217 | `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
|
---|
1218 | ByGroupNames(map[string]Emitter{
|
---|
1219 | `operator`: Operator,
|
---|
1220 | `method_name`: NameFunction,
|
---|
1221 | }),
|
---|
1222 | Push(`name-adverb`),
|
---|
1223 | },
|
---|
1224 | // Function/Signature
|
---|
1225 | {
|
---|
1226 | `\(`, Punctuation, replaceRule(
|
---|
1227 | ruleReplacingConfig{
|
---|
1228 | delimiter: []rune(`)`),
|
---|
1229 | tokenType: Punctuation,
|
---|
1230 | stateName: `root`,
|
---|
1231 | pushState: true,
|
---|
1232 | }),
|
---|
1233 | },
|
---|
1234 | Default(Pop(1)),
|
---|
1235 | },
|
---|
1236 | "Q": {
|
---|
1237 | Include("escape-qq"),
|
---|
1238 | {`.+?`, String, nil},
|
---|
1239 | },
|
---|
1240 | "Q-closure": {
|
---|
1241 | Include("escape-qq"),
|
---|
1242 | Include("closure"),
|
---|
1243 | {`.+?`, String, nil},
|
---|
1244 | },
|
---|
1245 | "Q-variable": {
|
---|
1246 | Include("escape-qq"),
|
---|
1247 | Include("qq-variable"),
|
---|
1248 | {`.+?`, String, nil},
|
---|
1249 | },
|
---|
1250 | "closure": {
|
---|
1251 | {`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
|
---|
1252 | ruleReplacingConfig{
|
---|
1253 | delimiter: []rune(`}`),
|
---|
1254 | tokenType: Punctuation,
|
---|
1255 | stateName: `root`,
|
---|
1256 | pushState: true,
|
---|
1257 | }),
|
---|
1258 | },
|
---|
1259 | },
|
---|
1260 | "token": {
|
---|
1261 | // Token signature
|
---|
1262 | {`\(`, Punctuation, replaceRule(
|
---|
1263 | ruleReplacingConfig{
|
---|
1264 | delimiter: []rune(`)`),
|
---|
1265 | tokenType: Punctuation,
|
---|
1266 | stateName: `root`,
|
---|
1267 | pushState: true,
|
---|
1268 | }),
|
---|
1269 | },
|
---|
1270 | {`{`, Punctuation, replaceRule(
|
---|
1271 | ruleReplacingConfig{
|
---|
1272 | delimiter: []rune(`}`),
|
---|
1273 | tokenType: Punctuation,
|
---|
1274 | stateName: `regex`,
|
---|
1275 | popState: true,
|
---|
1276 | pushState: true,
|
---|
1277 | }),
|
---|
1278 | },
|
---|
1279 | {`\s*`, Text, nil},
|
---|
1280 | Default(Pop(1)),
|
---|
1281 | },
|
---|
1282 | }
|
---|
1283 | }
|
---|
1284 |
|
---|
1285 | // Joins keys of rune map
|
---|
1286 | func joinRuneMap(m map[rune]rune) string {
|
---|
1287 | runes := make([]rune, 0, len(m))
|
---|
1288 | for k := range m {
|
---|
1289 | runes = append(runes, k)
|
---|
1290 | }
|
---|
1291 |
|
---|
1292 | return string(runes)
|
---|
1293 | }
|
---|
1294 |
|
---|
1295 | // Finds the index of substring in the string starting at position n
|
---|
1296 | func indexAt(str []rune, substr []rune, pos int) int {
|
---|
1297 | strFromPos := str[pos:]
|
---|
1298 | text := string(strFromPos)
|
---|
1299 |
|
---|
1300 | idx := strings.Index(text, string(substr))
|
---|
1301 | if idx > -1 {
|
---|
1302 | idx = utf8.RuneCountInString(text[:idx])
|
---|
1303 |
|
---|
1304 | // Search again if the substr is escaped with backslash
|
---|
1305 | if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
|
---|
1306 | (idx == 1 && strFromPos[idx-1] == '\\') {
|
---|
1307 | idx = indexAt(str[pos:], substr, idx+1)
|
---|
1308 |
|
---|
1309 | idx = utf8.RuneCountInString(text[:idx])
|
---|
1310 |
|
---|
1311 | if idx < 0 {
|
---|
1312 | return idx
|
---|
1313 | }
|
---|
1314 | }
|
---|
1315 | idx += pos
|
---|
1316 | }
|
---|
1317 |
|
---|
1318 | return idx
|
---|
1319 | }
|
---|
1320 |
|
---|
1321 | // Tells if an array of string contains a string
|
---|
1322 | func contains(s []string, e string) bool {
|
---|
1323 | for _, value := range s {
|
---|
1324 | if value == e {
|
---|
1325 | return true
|
---|
1326 | }
|
---|
1327 | }
|
---|
1328 | return false
|
---|
1329 | }
|
---|
1330 |
|
---|
1331 | type rulePosition int
|
---|
1332 |
|
---|
1333 | const (
|
---|
1334 | topRule rulePosition = 0
|
---|
1335 | bottomRule = -1
|
---|
1336 | )
|
---|
1337 |
|
---|
1338 | type ruleMakingConfig struct {
|
---|
1339 | delimiter []rune
|
---|
1340 | pattern string
|
---|
1341 | tokenType Emitter
|
---|
1342 | mutator Mutator
|
---|
1343 | numberOfDelimiterChars int
|
---|
1344 | }
|
---|
1345 |
|
---|
1346 | type ruleReplacingConfig struct {
|
---|
1347 | delimiter []rune
|
---|
1348 | pattern string
|
---|
1349 | tokenType Emitter
|
---|
1350 | numberOfDelimiterChars int
|
---|
1351 | mutator Mutator
|
---|
1352 | appendMutator Mutator
|
---|
1353 | rulePosition rulePosition
|
---|
1354 | stateName string
|
---|
1355 | pop bool
|
---|
1356 | popState bool
|
---|
1357 | pushState bool
|
---|
1358 | }
|
---|
1359 |
|
---|
1360 | // Pops rule from state-stack and replaces the rule with the previous rule
|
---|
1361 | func popRule(rule ruleReplacingConfig) MutatorFunc {
|
---|
1362 | return func(state *LexerState) error {
|
---|
1363 | stackName := genStackName(rule.stateName, rule.rulePosition)
|
---|
1364 |
|
---|
1365 | stack, ok := state.Get(stackName).([]ruleReplacingConfig)
|
---|
1366 |
|
---|
1367 | if ok && len(stack) > 0 {
|
---|
1368 | // Pop from stack
|
---|
1369 | stack = stack[:len(stack)-1]
|
---|
1370 | lastRule := stack[len(stack)-1]
|
---|
1371 | lastRule.pushState = false
|
---|
1372 | lastRule.popState = false
|
---|
1373 | lastRule.pop = true
|
---|
1374 | state.Set(stackName, stack)
|
---|
1375 |
|
---|
1376 | // Call replaceRule to use the last rule
|
---|
1377 | err := replaceRule(lastRule)(state)
|
---|
1378 | if err != nil {
|
---|
1379 | panic(err)
|
---|
1380 | }
|
---|
1381 | }
|
---|
1382 |
|
---|
1383 | return nil
|
---|
1384 | }
|
---|
1385 | }
|
---|
1386 |
|
---|
1387 | // Replaces a state's rule based on the rule config and position
|
---|
1388 | func replaceRule(rule ruleReplacingConfig) MutatorFunc {
|
---|
1389 | return func(state *LexerState) error {
|
---|
1390 | stateName := rule.stateName
|
---|
1391 | stackName := genStackName(rule.stateName, rule.rulePosition)
|
---|
1392 |
|
---|
1393 | stack, ok := state.Get(stackName).([]ruleReplacingConfig)
|
---|
1394 | if !ok {
|
---|
1395 | stack = []ruleReplacingConfig{}
|
---|
1396 | }
|
---|
1397 |
|
---|
1398 | // If state-stack is empty fill it with the placeholder rule
|
---|
1399 | if len(stack) == 0 {
|
---|
1400 | stack = []ruleReplacingConfig{
|
---|
1401 | {
|
---|
1402 | // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
|
---|
1403 | pattern: `\A\z`,
|
---|
1404 | tokenType: nil,
|
---|
1405 | mutator: nil,
|
---|
1406 | stateName: stateName,
|
---|
1407 | rulePosition: rule.rulePosition,
|
---|
1408 | },
|
---|
1409 | }
|
---|
1410 | state.Set(stackName, stack)
|
---|
1411 | }
|
---|
1412 |
|
---|
1413 | var mutator Mutator
|
---|
1414 | mutators := []Mutator{}
|
---|
1415 |
|
---|
1416 | switch {
|
---|
1417 | case rule.rulePosition == topRule && rule.mutator == nil:
|
---|
1418 | // Default mutator for top rule
|
---|
1419 | mutators = []Mutator{Pop(1), popRule(rule)}
|
---|
1420 | case rule.rulePosition == topRule && rule.mutator != nil:
|
---|
1421 | // Default mutator for top rule, when rule.mutator is set
|
---|
1422 | mutators = []Mutator{rule.mutator, popRule(rule)}
|
---|
1423 | case rule.mutator != nil:
|
---|
1424 | mutators = []Mutator{rule.mutator}
|
---|
1425 | }
|
---|
1426 |
|
---|
1427 | if rule.appendMutator != nil {
|
---|
1428 | mutators = append(mutators, rule.appendMutator)
|
---|
1429 | }
|
---|
1430 |
|
---|
1431 | if len(mutators) > 0 {
|
---|
1432 | mutator = Mutators(mutators...)
|
---|
1433 | } else {
|
---|
1434 | mutator = nil
|
---|
1435 | }
|
---|
1436 |
|
---|
1437 | ruleConfig := ruleMakingConfig{
|
---|
1438 | pattern: rule.pattern,
|
---|
1439 | delimiter: rule.delimiter,
|
---|
1440 | numberOfDelimiterChars: rule.numberOfDelimiterChars,
|
---|
1441 | tokenType: rule.tokenType,
|
---|
1442 | mutator: mutator,
|
---|
1443 | }
|
---|
1444 |
|
---|
1445 | cRule := makeRule(ruleConfig)
|
---|
1446 |
|
---|
1447 | switch rule.rulePosition {
|
---|
1448 | case topRule:
|
---|
1449 | state.Rules[stateName][0] = cRule
|
---|
1450 | case bottomRule:
|
---|
1451 | state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
|
---|
1452 | }
|
---|
1453 |
|
---|
1454 | // Pop state name from stack if asked. State should be popped first before Pushing
|
---|
1455 | if rule.popState {
|
---|
1456 | err := Pop(1).Mutate(state)
|
---|
1457 | if err != nil {
|
---|
1458 | panic(err)
|
---|
1459 | }
|
---|
1460 | }
|
---|
1461 |
|
---|
1462 | // Push state name to stack if asked
|
---|
1463 | if rule.pushState {
|
---|
1464 | err := Push(stateName).Mutate(state)
|
---|
1465 | if err != nil {
|
---|
1466 | panic(err)
|
---|
1467 | }
|
---|
1468 | }
|
---|
1469 |
|
---|
1470 | if !rule.pop {
|
---|
1471 | state.Set(stackName, append(stack, rule))
|
---|
1472 | }
|
---|
1473 |
|
---|
1474 | return nil
|
---|
1475 | }
|
---|
1476 | }
|
---|
1477 |
|
---|
1478 | // Generates rule replacing stack using state name and rule position
|
---|
1479 | func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
|
---|
1480 | switch rulePosition {
|
---|
1481 | case topRule:
|
---|
1482 | stackName = stateName + `-top-stack`
|
---|
1483 | case bottomRule:
|
---|
1484 | stackName = stateName + `-bottom-stack`
|
---|
1485 | }
|
---|
1486 | return
|
---|
1487 | }
|
---|
1488 |
|
---|
1489 | // Makes a compiled rule and returns it
|
---|
1490 | func makeRule(config ruleMakingConfig) *CompiledRule {
|
---|
1491 | var rePattern string
|
---|
1492 |
|
---|
1493 | if len(config.delimiter) > 0 {
|
---|
1494 | delimiter := string(config.delimiter)
|
---|
1495 |
|
---|
1496 | if config.numberOfDelimiterChars > 1 {
|
---|
1497 | delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
|
---|
1498 | }
|
---|
1499 |
|
---|
1500 | rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
|
---|
1501 | } else {
|
---|
1502 | rePattern = config.pattern
|
---|
1503 | }
|
---|
1504 |
|
---|
1505 | regex := regexp2.MustCompile(rePattern, regexp2.None)
|
---|
1506 |
|
---|
1507 | cRule := &CompiledRule{
|
---|
1508 | Rule: Rule{rePattern, config.tokenType, config.mutator},
|
---|
1509 | Regexp: regex,
|
---|
1510 | }
|
---|
1511 |
|
---|
1512 | return cRule
|
---|
1513 | }
|
---|
1514 |
|
---|
1515 | // Emitter for colon pairs, changes token state based on key and brackets
|
---|
1516 | func colonPair(tokenClass TokenType) Emitter {
|
---|
1517 | return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
---|
1518 | iterators := []Iterator{}
|
---|
1519 | tokens := []Token{
|
---|
1520 | {Punctuation, state.NamedGroups[`colon`]},
|
---|
1521 | {Punctuation, state.NamedGroups[`opening_delimiters`]},
|
---|
1522 | {Punctuation, state.NamedGroups[`closing_delimiters`]},
|
---|
1523 | }
|
---|
1524 |
|
---|
1525 | // Append colon
|
---|
1526 | iterators = append(iterators, Literator(tokens[0]))
|
---|
1527 |
|
---|
1528 | if tokenClass == NameAttribute {
|
---|
1529 | iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
|
---|
1530 | } else {
|
---|
1531 | var keyTokenState string
|
---|
1532 | keyre := regexp.MustCompile(`^\d+$`)
|
---|
1533 | if keyre.MatchString(state.NamedGroups[`key`]) {
|
---|
1534 | keyTokenState = "common"
|
---|
1535 | } else {
|
---|
1536 | keyTokenState = "Q"
|
---|
1537 | }
|
---|
1538 |
|
---|
1539 | // Use token state to Tokenise key
|
---|
1540 | if keyTokenState != "" {
|
---|
1541 | iterator, err := state.Lexer.Tokenise(
|
---|
1542 | &TokeniseOptions{
|
---|
1543 | State: keyTokenState,
|
---|
1544 | Nested: true,
|
---|
1545 | }, state.NamedGroups[`key`])
|
---|
1546 |
|
---|
1547 | if err != nil {
|
---|
1548 | panic(err)
|
---|
1549 | } else {
|
---|
1550 | // Append key
|
---|
1551 | iterators = append(iterators, iterator)
|
---|
1552 | }
|
---|
1553 | }
|
---|
1554 | }
|
---|
1555 |
|
---|
1556 | // Append punctuation
|
---|
1557 | iterators = append(iterators, Literator(tokens[1]))
|
---|
1558 |
|
---|
1559 | var valueTokenState string
|
---|
1560 |
|
---|
1561 | switch state.NamedGroups[`opening_delimiters`] {
|
---|
1562 | case "(", "{", "[":
|
---|
1563 | valueTokenState = "root"
|
---|
1564 | case "<<", "«":
|
---|
1565 | valueTokenState = "ww"
|
---|
1566 | case "<":
|
---|
1567 | valueTokenState = "Q"
|
---|
1568 | }
|
---|
1569 |
|
---|
1570 | // Use token state to Tokenise value
|
---|
1571 | if valueTokenState != "" {
|
---|
1572 | iterator, err := state.Lexer.Tokenise(
|
---|
1573 | &TokeniseOptions{
|
---|
1574 | State: valueTokenState,
|
---|
1575 | Nested: true,
|
---|
1576 | }, state.NamedGroups[`value`])
|
---|
1577 |
|
---|
1578 | if err != nil {
|
---|
1579 | panic(err)
|
---|
1580 | } else {
|
---|
1581 | // Append value
|
---|
1582 | iterators = append(iterators, iterator)
|
---|
1583 | }
|
---|
1584 | }
|
---|
1585 | // Append last punctuation
|
---|
1586 | iterators = append(iterators, Literator(tokens[2]))
|
---|
1587 |
|
---|
1588 | return Concaterator(iterators...)
|
---|
1589 | })
|
---|
1590 | }
|
---|
1591 |
|
---|
1592 | // Emitter for quoting constructs, changes token state based on quote name and adverbs
|
---|
1593 | func quote(groups []string, state *LexerState) Iterator {
|
---|
1594 | keyword := state.NamedGroups[`keyword`]
|
---|
1595 | adverbsStr := state.NamedGroups[`adverbs`]
|
---|
1596 | iterators := []Iterator{}
|
---|
1597 | tokens := []Token{
|
---|
1598 | {Keyword, keyword},
|
---|
1599 | {StringAffix, adverbsStr},
|
---|
1600 | {Text, state.NamedGroups[`ws`]},
|
---|
1601 | {Punctuation, state.NamedGroups[`opening_delimiters`]},
|
---|
1602 | {Punctuation, state.NamedGroups[`closing_delimiters`]},
|
---|
1603 | }
|
---|
1604 |
|
---|
1605 | // Append all tokens before dealing with the main string
|
---|
1606 | iterators = append(iterators, Literator(tokens[:4]...))
|
---|
1607 |
|
---|
1608 | var tokenStates []string
|
---|
1609 |
|
---|
1610 | // Set tokenStates based on adverbs
|
---|
1611 | adverbs := strings.Split(adverbsStr, ":")
|
---|
1612 | for _, adverb := range adverbs {
|
---|
1613 | switch adverb {
|
---|
1614 | case "c", "closure":
|
---|
1615 | tokenStates = append(tokenStates, "Q-closure")
|
---|
1616 | case "qq":
|
---|
1617 | tokenStates = append(tokenStates, "qq")
|
---|
1618 | case "ww":
|
---|
1619 | tokenStates = append(tokenStates, "ww")
|
---|
1620 | case "s", "scalar", "a", "array", "h", "hash", "f", "function":
|
---|
1621 | tokenStates = append(tokenStates, "Q-variable")
|
---|
1622 | }
|
---|
1623 | }
|
---|
1624 |
|
---|
1625 | var tokenState string
|
---|
1626 |
|
---|
1627 | switch {
|
---|
1628 | case keyword == "qq" || contains(tokenStates, "qq"):
|
---|
1629 | tokenState = "qq"
|
---|
1630 | case adverbsStr == "ww" || contains(tokenStates, "ww"):
|
---|
1631 | tokenState = "ww"
|
---|
1632 | case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"):
|
---|
1633 | tokenState = "qq"
|
---|
1634 | case contains(tokenStates, "Q-closure"):
|
---|
1635 | tokenState = "Q-closure"
|
---|
1636 | case contains(tokenStates, "Q-variable"):
|
---|
1637 | tokenState = "Q-variable"
|
---|
1638 | default:
|
---|
1639 | tokenState = "Q"
|
---|
1640 | }
|
---|
1641 |
|
---|
1642 | iterator, err := state.Lexer.Tokenise(
|
---|
1643 | &TokeniseOptions{
|
---|
1644 | State: tokenState,
|
---|
1645 | Nested: true,
|
---|
1646 | }, state.NamedGroups[`value`])
|
---|
1647 |
|
---|
1648 | if err != nil {
|
---|
1649 | panic(err)
|
---|
1650 | } else {
|
---|
1651 | iterators = append(iterators, iterator)
|
---|
1652 | }
|
---|
1653 |
|
---|
1654 | // Append the last punctuation
|
---|
1655 | iterators = append(iterators, Literator(tokens[4]))
|
---|
1656 |
|
---|
1657 | return Concaterator(iterators...)
|
---|
1658 | }
|
---|
1659 |
|
---|
1660 | // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
|
---|
1661 | func podConfig(groups []string, state *LexerState) Iterator {
|
---|
1662 | // Tokenise pod config
|
---|
1663 | iterator, err := state.Lexer.Tokenise(
|
---|
1664 | &TokeniseOptions{
|
---|
1665 | State: "colon-pair-attribute",
|
---|
1666 | Nested: true,
|
---|
1667 | }, groups[0])
|
---|
1668 |
|
---|
1669 | if err != nil {
|
---|
1670 | panic(err)
|
---|
1671 | } else {
|
---|
1672 | return iterator
|
---|
1673 | }
|
---|
1674 | }
|
---|
1675 |
|
---|
1676 | // Emitter for pod code, tokenises the code based on the lang specified
|
---|
1677 | func podCode(groups []string, state *LexerState) Iterator {
|
---|
1678 | iterators := []Iterator{}
|
---|
1679 | tokens := []Token{
|
---|
1680 | {Comment, state.NamedGroups[`ws`]},
|
---|
1681 | {Keyword, state.NamedGroups[`keyword`]},
|
---|
1682 | {Keyword, state.NamedGroups[`ws2`]},
|
---|
1683 | {Keyword, state.NamedGroups[`name`]},
|
---|
1684 | {StringDoc, state.NamedGroups[`value`]},
|
---|
1685 | {Comment, state.NamedGroups[`ws3`]},
|
---|
1686 | {Keyword, state.NamedGroups[`end_keyword`]},
|
---|
1687 | {Keyword, state.NamedGroups[`ws4`]},
|
---|
1688 | {Keyword, state.NamedGroups[`name`]},
|
---|
1689 | }
|
---|
1690 |
|
---|
1691 | // Append all tokens before dealing with the pod config
|
---|
1692 | iterators = append(iterators, Literator(tokens[:4]...))
|
---|
1693 |
|
---|
1694 | // Tokenise pod config
|
---|
1695 | iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
|
---|
1696 |
|
---|
1697 | langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
|
---|
1698 | var lang string
|
---|
1699 | if len(langMatch) > 1 {
|
---|
1700 | lang = langMatch[1]
|
---|
1701 | }
|
---|
1702 |
|
---|
1703 | // Tokenise code based on lang property
|
---|
1704 | sublexer := Get(lang)
|
---|
1705 | if sublexer != nil {
|
---|
1706 | iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
|
---|
1707 |
|
---|
1708 | if err != nil {
|
---|
1709 | panic(err)
|
---|
1710 | } else {
|
---|
1711 | iterators = append(iterators, iterator)
|
---|
1712 | }
|
---|
1713 | } else {
|
---|
1714 | iterators = append(iterators, Literator(tokens[4]))
|
---|
1715 | }
|
---|
1716 |
|
---|
1717 | // Append the rest of the tokens
|
---|
1718 | iterators = append(iterators, Literator(tokens[5:]...))
|
---|
1719 |
|
---|
1720 | return Concaterator(iterators...)
|
---|
1721 | }
|
---|