source: code/trunk/vendor/github.com/alecthomas/chroma/v2/lexers/raku.go@ 67

Last change on this file since 67 was 67, checked in by Izuru Yakumo, 23 months ago

Use vendored modules

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 58.9 KB
Line 
1package lexers
2
3import (
4 "regexp"
5 "strings"
6 "unicode/utf8"
7
8 "github.com/dlclark/regexp2"
9
10 . "github.com/alecthomas/chroma/v2" // nolint
11)
12
13// Raku lexer.
14var Raku Lexer = Register(MustNewLexer(
15 &Config{
16 Name: "Raku",
17 Aliases: []string{"perl6", "pl6", "raku"},
18 Filenames: []string{
19 "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
20 "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
21 },
22 MimeTypes: []string{
23 "text/x-perl6", "application/x-perl6",
24 "text/x-raku", "application/x-raku",
25 },
26 DotAll: true,
27 },
28 rakuRules,
29))
30
31func rakuRules() Rules {
32 type RakuToken int
33
34 const (
35 rakuQuote RakuToken = iota
36 rakuNameAttribute
37 rakuPod
38 rakuPodFormatter
39 rakuPodDeclaration
40 rakuMultilineComment
41 rakuMatchRegex
42 rakuSubstitutionRegex
43 )
44
45 const (
46 colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
47 colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
48 colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
49 colonPairLookahead = `(?=(:['\w-]+` +
50 colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
51 namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
52 variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern
53 globalVariablePattern = `[$@%&]+\*` + namePattern
54 )
55
56 keywords := []string{
57 `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
58 `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
59 `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
60 `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
61 `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
62 `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
63 `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
64 `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
65 `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
66 `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
67 `dynamic-scope`, `built`, `temp`,
68 }
69
70 keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
71
72 wordOperators := []string{
73 `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
74 `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
75 `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
76 `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
77 `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
78 }
79
80 wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
81
82 operators := []string{
83 `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
84 `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
85 `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
86 `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
87 `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
88 `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
89 `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
90 `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
91 }
92
93 operatorsPattern := Words(``, ``, operators...)
94
95 builtinTypes := []string{
96 `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
97 `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
98 `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
99 `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
100 `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
101 `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
102 `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
103 `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
104 `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
105 `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
106 `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
107 `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
108 `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
109 `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
110 `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
111 `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
112 `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
113 `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
114 `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
115 `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
116 `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
117 `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
118 `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
119 `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
120 `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
121 `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
122 `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
123 `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
124 `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
125 `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
126 `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
127 `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
128 `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
129 `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
130 `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
131 `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
132 `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
133 `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
134 `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
135 `WhateverCode`, `WrapHandle`, `NativeCall`,
136 // Pragmas
137 `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
138 `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
139 `strict`, `trace`, `variables`,
140 }
141
142 builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
143
144 builtinRoutines := []string{
145 `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
146 `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
147 `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
148 `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
149 `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
150 `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
151 `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
152 `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
153 `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
154 `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
155 `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
156 `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
157 `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
158 `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
159 `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
160 `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
161 `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
162 `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
163 `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
164 `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
165 `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
166 `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
167 `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
168 `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
169 `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
170 `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
171 `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
172 `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
173 `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
174 `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
175 `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
176 `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
177 `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
178 `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
179 `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
180 `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
181 `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
182 `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
183 `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
184 `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
185 `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
186 `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
187 `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
188 `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
189 `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
190 `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
191 `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
192 `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
193 `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
194 `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
195 `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
196 `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
197 `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
198 `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
199 `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
200 `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
201 `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
202 `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
203 `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
204 `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
205 `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
206 `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
207 `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
208 `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
209 `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
210 `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
211 `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
212 `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
213 `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
214 `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
215 `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
216 `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
217 `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
218 `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
219 `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
220 `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
221 `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
222 `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
223 `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
224 `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
225 `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
226 `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
227 `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
228 `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
229 `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
230 `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
231 `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
232 `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
233 `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
234 `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
235 `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
236 `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
237 `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
238 `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
239 `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
240 `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
241 `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
242 `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
243 `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
244 `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
245 `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
246 `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
247 `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
248 `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
249 `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
250 `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
251 `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
252 `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
253 `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
254 `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
255 `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
256 `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
257 `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
258 `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
259 `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
260 `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
261 `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
262 `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
263 `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
264 }
265
266 builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
267
268 // A map of opening and closing brackets
269 brackets := map[rune]rune{
270 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
271 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
272 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
273 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
274 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
275 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
276 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
277 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
278 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
279 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
280 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
281 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
282 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
283 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
284 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
285 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
286 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
287 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
288 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
289 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
290 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
291 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
292 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
293 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
294 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
295 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
296 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
297 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
298 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
299 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
300 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
301 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
302 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
303 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
304 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
305 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
306 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
307 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
308 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
309 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
310 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
311 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
312 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
313 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
314 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
315 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
316 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
317 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
318 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
319 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
320 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
321 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
322 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
323 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
324 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
325 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
326 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
327 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
328 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
329 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
330 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
331 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
332 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
333 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
334 }
335
336 bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
337
338 // Finds opening brackets and their closing counterparts (including pod and heredoc)
339 // and modifies state groups and position accordingly
340 findBrackets := func(tokenClass RakuToken) MutatorFunc {
341 return func(state *LexerState) error {
342 var openingChars []rune
343 var adverbs []rune
344 switch tokenClass {
345 case rakuPod:
346 openingChars = []rune(strings.Join(state.Groups[1:5], ``))
347 default:
348 adverbs = []rune(state.NamedGroups[`adverbs`])
349 openingChars = []rune(state.NamedGroups[`opening_delimiters`])
350 }
351
352 openingChar := openingChars[0]
353
354 nChars := len(openingChars)
355
356 var closingChar rune
357 var closingCharExists bool
358 var closingChars []rune
359
360 switch tokenClass {
361 case rakuPod:
362 closingCharExists = true
363 default:
364 closingChar, closingCharExists = brackets[openingChar]
365 }
366
367 switch tokenClass {
368 case rakuPodFormatter:
369 formatter := StringOther
370
371 switch state.NamedGroups[`keyword`] {
372 case "B":
373 formatter = GenericStrong
374 case "I":
375 formatter = GenericEmph
376 case "U":
377 formatter = GenericUnderline
378 }
379
380 formatterRule := ruleReplacingConfig{
381 pattern: `.+?`,
382 tokenType: formatter,
383 mutator: nil,
384 stateName: `pod-formatter`,
385 rulePosition: bottomRule,
386 }
387
388 err := replaceRule(formatterRule)(state)
389 if err != nil {
390 panic(err)
391 }
392
393 err = replaceRule(ruleReplacingConfig{
394 delimiter: []rune{closingChar},
395 tokenType: Punctuation,
396 stateName: `pod-formatter`,
397 pushState: true,
398 numberOfDelimiterChars: nChars,
399 appendMutator: popRule(formatterRule),
400 })(state)
401 if err != nil {
402 panic(err)
403 }
404
405 return nil
406 case rakuMatchRegex:
407 var delimiter []rune
408 if closingCharExists {
409 delimiter = []rune{closingChar}
410 } else {
411 delimiter = openingChars
412 }
413
414 err := replaceRule(ruleReplacingConfig{
415 delimiter: delimiter,
416 tokenType: Punctuation,
417 stateName: `regex`,
418 popState: true,
419 pushState: true,
420 })(state)
421 if err != nil {
422 panic(err)
423 }
424
425 return nil
426 case rakuSubstitutionRegex:
427 delimiter := regexp2.Escape(string(openingChars))
428
429 err := replaceRule(ruleReplacingConfig{
430 pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
431 tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
432 rulePosition: topRule,
433 stateName: `regex`,
434 popState: true,
435 pushState: true,
436 })(state)
437 if err != nil {
438 panic(err)
439 }
440
441 return nil
442 }
443
444 text := state.Text
445
446 var endPos int
447
448 var nonMirroredOpeningCharPosition int
449
450 if !closingCharExists {
451 // it's not a mirrored character, which means we
452 // just need to look for the next occurrence
453 closingChars = openingChars
454 nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
455 endPos = nonMirroredOpeningCharPosition
456 } else {
457 var podRegex *regexp2.Regexp
458 if tokenClass == rakuPod {
459 podRegex = regexp2.MustCompile(
460 state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
461 0,
462 )
463 } else {
464 closingChars = []rune(strings.Repeat(string(closingChar), nChars))
465 }
466
467 // we need to look for the corresponding closing character,
468 // keep nesting in mind
469 nestingLevel := 1
470
471 searchPos := state.Pos - nChars
472
473 var nextClosePos int
474
475 for nestingLevel > 0 {
476 if tokenClass == rakuPod {
477 match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
478 if err == nil {
479 closingChars = match.Runes()
480 nextClosePos = match.Index
481 } else {
482 nextClosePos = -1
483 }
484 } else {
485 nextClosePos = indexAt(text, closingChars, searchPos+nChars)
486 }
487
488 nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
489
490 switch {
491 case nextClosePos == -1:
492 nextClosePos = len(text)
493 nestingLevel = 0
494 case nextOpenPos != -1 && nextOpenPos < nextClosePos:
495 nestingLevel++
496 nChars = len(openingChars)
497 searchPos = nextOpenPos
498 default: // next_close_pos < next_open_pos
499 nestingLevel--
500 nChars = len(closingChars)
501 searchPos = nextClosePos
502 }
503 }
504
505 endPos = nextClosePos
506 }
507
508 if endPos < 0 {
509 // if we didn't find a closer, just highlight the
510 // rest of the text in this class
511 endPos = len(text)
512 }
513
514 adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
515 var heredocTerminator []rune
516 var endHeredocPos int
517 if adverbre.MatchString(string(adverbs)) {
518 if endPos != len(text) {
519 heredocTerminator = text[state.Pos:endPos]
520 nChars = len(heredocTerminator)
521 } else {
522 endPos = state.Pos + 1
523 heredocTerminator = []rune{}
524 nChars = 0
525 }
526
527 if nChars > 0 {
528 endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
529 if endHeredocPos > -1 {
530 endPos += endHeredocPos
531 } else {
532 endPos = len(text)
533 }
534 }
535 }
536
537 textBetweenBrackets := string(text[state.Pos:endPos])
538 switch tokenClass {
539 case rakuPod, rakuPodDeclaration, rakuNameAttribute:
540 state.NamedGroups[`value`] = textBetweenBrackets
541 state.NamedGroups[`closing_delimiters`] = string(closingChars)
542 case rakuQuote:
543 if len(heredocTerminator) > 0 {
544 // Length of heredoc terminator + closing chars + `;`
545 heredocFristPunctuationLen := nChars + len(openingChars) + 1
546
547 state.NamedGroups[`opening_delimiters`] = string(openingChars) +
548 string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
549
550 state.NamedGroups[`value`] =
551 string(text[state.Pos+heredocFristPunctuationLen : endPos])
552
553 if endHeredocPos > -1 {
554 state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
555 }
556 } else {
557 state.NamedGroups[`value`] = textBetweenBrackets
558 if nChars > 0 {
559 state.NamedGroups[`closing_delimiters`] = string(closingChars)
560 }
561 }
562 default:
563 state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
564 }
565
566 state.Pos = endPos + nChars
567
568 return nil
569 }
570 }
571
572 // Raku rules
573 // Empty capture groups are placeholders and will be replaced by mutators
574 // DO NOT REMOVE THEM!
575 return Rules{
576 "root": {
577 // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
578 {`\A\z`, nil, nil},
579 Include("common"),
580 {`{`, Punctuation, Push(`root`)},
581 {`\(`, Punctuation, Push(`root`)},
582 {`[)}]`, Punctuation, Pop(1)},
583 {`;`, Punctuation, nil},
584 {`\[|\]`, Operator, nil},
585 {`.+?`, Text, nil},
586 },
587 "common": {
588 {`^#![^\n]*$`, CommentHashbang, nil},
589 Include("pod"),
590 // Multi-line, Embedded comment
591 {
592 "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
593 CommentMultiline,
594 findBrackets(rakuMultilineComment),
595 },
596 {`#[^\n]*$`, CommentSingle, nil},
597 // /regex/
598 {
599 `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
600 ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
601 nil,
602 },
603 Include("variable"),
604 // ::?VARIABLE
605 {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
606 // Version
607 {
608 `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
609 ByGroups(Keyword, NumberInteger, NameEntity, Operator),
610 nil,
611 },
612 Include("number"),
613 // Hyperoperator | »*«
614 {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
615 {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
616 // Hyperoperator | «*«
617 {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
618 {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
619 // Hyperoperator | »*»
620 {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
621 {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
622 // <<quoted words>>
623 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
624 // «quoted words»
625 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
626 // [<]
627 {`(?<=\[\\?)<(?=\])`, Operator, nil},
628 // < and > operators | something < onething > something
629 {
630 `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
631 ByGroups(Operator, UsingSelf("root"), Operator),
632 nil,
633 },
634 // <quoted words>
635 {
636 `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
637 ByGroups(Punctuation, String, Punctuation),
638 nil,
639 },
640 {`C?X::['\w:-]+`, NameException, nil},
641 Include("metaoperator"),
642 // Pair | key => value
643 {
644 `(\w[\w'-]*)(\s*)(=>)`,
645 ByGroups(String, Text, Operator),
646 nil,
647 },
648 Include("colon-pair"),
649 // Token
650 {
651 `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
652 NameFunction,
653 Push("token", "name-adverb"),
654 },
655 // Substitution
656 {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
657 {keywordsPattern, Keyword, nil},
658 {builtinTypesPattern, NameBuiltin, nil},
659 {builtinRoutinesPattern, NameBuiltin, nil},
660 // Class name
661 {
662 `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
663 NameClass,
664 Push("name-adverb"),
665 },
666 // Routine
667 {
668 `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
669 NameFunction,
670 Push("name-adverb"),
671 },
672 // Constant
673 {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
674 // Namespace
675 {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
676 Include("operator"),
677 Include("single-quote"),
678 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
679 // m,rx regex
680 {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
681 // Quote constructs
682 {
683 `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
684 EmitterFunc(quote),
685 findBrackets(rakuQuote),
686 },
687 // Function
688 {
689 `\b` + namePattern + colonPairLookahead + `\()`,
690 NameFunction,
691 Push("name-adverb"),
692 },
693 // Method
694 {
695 `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
696 NameFunction,
697 Push("name-adverb"),
698 },
699 // Indirect invocant
700 {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
701 {`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
702 {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
703 {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
704 // Sigilless variable
705 {
706 `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
707 NameVariable,
708 Push("name-adverb"),
709 },
710 {namePattern, Name, Push("name-adverb")},
711 },
712 "rx": {
713 Include("colon-pair-attribute"),
714 {
715 `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
716 ByGroupNames(
717 map[string]Emitter{
718 `opening_delimiters`: Punctuation,
719 `delimiter`: nil,
720 },
721 ),
722 findBrackets(rakuMatchRegex),
723 },
724 },
725 "substitution": {
726 Include("colon-pair-attribute"),
727 // Substitution | s{regex} = value
728 {
729 `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
730 ByGroupNames(map[string]Emitter{
731 `opening_delimiters`: Punctuation,
732 `delimiter`: nil,
733 }),
734 findBrackets(rakuMatchRegex),
735 },
736 // Substitution | s/regex/string/
737 {
738 `(?<opening_delimiters>[^\w:\s])`,
739 Punctuation,
740 findBrackets(rakuSubstitutionRegex),
741 },
742 },
743 "number": {
744 {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
745 {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
746 {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
747 {
748 `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
749 LiteralNumberFloat,
750 nil,
751 },
752 {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
753 {`(?<=\d+)i`, NameConstant, nil},
754 {`\d+(_\d+)*`, LiteralNumberInteger, nil},
755 },
756 "name-adverb": {
757 Include("colon-pair-attribute-keyvalue"),
758 Default(Pop(1)),
759 },
760 "colon-pair": {
761 // :key(value)
762 {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
763 // :123abc
764 {
765 `(:)(\d+)(\w[\w'-]*)`,
766 ByGroups(Punctuation, UsingSelf("number"), String),
767 nil,
768 },
769 // :key
770 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
771 {`\s+`, Text, nil},
772 },
773 "colon-pair-attribute": {
774 // :key(value)
775 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
776 // :123abc
777 {
778 `(:)(\d+)(\w[\w'-]*)`,
779 ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
780 nil,
781 },
782 // :key
783 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
784 {`\s+`, Text, nil},
785 },
786 "colon-pair-attribute-keyvalue": {
787 // :key(value)
788 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
789 },
790 "escape-qq": {
791 {
792 `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
793 ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
794 nil,
795 },
796 },
797 `escape-char`: {
798 {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
799 },
800 `escape-single-quote`: {
801 {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
802 },
803 "escape-c-name": {
804 {
805 `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
806 ByGroups(StringEscape, Punctuation, String, Punctuation),
807 nil,
808 },
809 },
810 "escape-hexadecimal": {
811 {
812 `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
813 ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
814 nil,
815 },
816 {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
817 },
818 "regex": {
819 // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
820 {`\A\z`, nil, nil},
821 Include("regex-escape-class"),
822 Include(`regex-character-escape`),
823 // $(code)
824 {
825 `([$@])((?<!(?<!\\)\\)\()`,
826 ByGroups(Keyword, Punctuation),
827 replaceRule(ruleReplacingConfig{
828 delimiter: []rune(`)`),
829 tokenType: Punctuation,
830 stateName: `root`,
831 pushState: true,
832 }),
833 },
834 // Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
835 {`\$(?=/)`, NameEntity, nil},
836 // Exclude $ from variables
837 {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
838 Include("variable"),
839 Include("escape-c-name"),
840 Include("escape-hexadecimal"),
841 Include("number"),
842 Include("single-quote"),
843 // :my variable code ...
844 {
845 `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
846 ByGroups(Operator, KeywordDeclaration),
847 replaceRule(ruleReplacingConfig{
848 delimiter: []rune(`;`),
849 tokenType: Punctuation,
850 stateName: `root`,
851 pushState: true,
852 }),
853 },
854 // <{code}>
855 {
856 `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
857 ByGroups(Punctuation, Operator, Punctuation),
858 replaceRule(ruleReplacingConfig{
859 delimiter: []rune(`}>`),
860 tokenType: Punctuation,
861 stateName: `root`,
862 pushState: true,
863 }),
864 },
865 // {code}
866 Include(`closure`),
867 // Properties
868 {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
869 // Operator
870 {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
871 // Anchors
872 {`\^\^|\^|\$\$|\$`, NameEntity, nil},
873 {`\.`, NameEntity, nil},
874 {`#[^\n]*\n`, CommentSingle, nil},
875 // Lookaround
876 {
877 `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
878 ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
879 replaceRule(ruleReplacingConfig{
880 delimiter: []rune(`>`),
881 tokenType: Punctuation,
882 stateName: `regex`,
883 pushState: true,
884 }),
885 },
886 {
887 `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
888 ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
889 nil,
890 },
891 // <$variable>
892 {
893 `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
894 ByGroups(Punctuation, Operator, NameVariable, Punctuation),
895 nil,
896 },
897 // Capture markers
898 {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
899 {
900 `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
901 ByGroups(Punctuation, NameVariable, Operator),
902 Push(`regex-variable`),
903 },
904 {
905 `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
906 ByGroups(Punctuation, Operator, NameFunction),
907 Push(`regex-function`),
908 },
909 {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
910 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
911 {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
912 {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
913 {`.+?`, StringRegex, nil},
914 },
915 "regex-class-builtin": {
916 {
917 `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
918 NameBuiltin,
919 nil,
920 },
921 },
922 "regex-function": {
923 // <function>
924 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
925 // <function(parameter)>
926 {
927 `\(`,
928 Punctuation,
929 replaceRule(ruleReplacingConfig{
930 delimiter: []rune(`)>`),
931 tokenType: Punctuation,
932 stateName: `root`,
933 popState: true,
934 pushState: true,
935 }),
936 },
937 // <function value>
938 {
939 `\s+`,
940 StringRegex,
941 replaceRule(ruleReplacingConfig{
942 delimiter: []rune(`>`),
943 tokenType: Punctuation,
944 stateName: `regex`,
945 popState: true,
946 pushState: true,
947 }),
948 },
949 // <function: value>
950 {
951 `:`,
952 Punctuation,
953 replaceRule(ruleReplacingConfig{
954 delimiter: []rune(`>`),
955 tokenType: Punctuation,
956 stateName: `root`,
957 popState: true,
958 pushState: true,
959 }),
960 },
961 },
962 "regex-variable": {
963 Include(`regex-starting-operators`),
964 // <var=function(
965 {
966 `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
967 ByGroups(Operator, NameFunction),
968 Mutators(Pop(1), Push(`regex-function`)),
969 },
970 // <var=function>
971 {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
972 // <var=
973 Default(Pop(1), Push(`regex-property`)),
974 },
975 "regex-property": {
976 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
977 Include("regex-class-builtin"),
978 Include("variable"),
979 Include(`regex-starting-operators`),
980 Include("colon-pair-attribute"),
981 {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
982 {`\+|\-`, Operator, nil},
983 {`@[\w':-]+`, NameVariable, nil},
984 {`.+?`, StringRegex, nil},
985 },
986 `regex-starting-operators`: {
987 {`(?<=<)[|!?.]+`, Operator, nil},
988 },
989 "regex-escape-class": {
990 {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
991 },
992 `regex-character-escape`: {
993 {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
994 },
995 "regex-character-class": {
996 {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
997 Include("regex-escape-class"),
998 Include("escape-c-name"),
999 Include("escape-hexadecimal"),
1000 Include(`regex-character-escape`),
1001 Include("number"),
1002 {`\.\.`, Operator, nil},
1003 {`.+?`, StringRegex, nil},
1004 },
1005 "metaoperator": {
1006 // Z[=>]
1007 {
1008 `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
1009 ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
1010 nil,
1011 },
1012 // Z=>
1013 {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
1014 },
1015 "operator": {
1016 // Word Operator
1017 {wordOperatorsPattern, OperatorWord, nil},
1018 // Operator
1019 {operatorsPattern, Operator, nil},
1020 },
1021 "pod": {
1022 // Single-line pod declaration
1023 {`(#[|=])\s`, Keyword, Push("pod-single")},
1024 // Multi-line pod declaration
1025 {
1026 "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
1027 ByGroupNames(
1028 map[string]Emitter{
1029 `keyword`: Keyword,
1030 `opening_delimiters`: Punctuation,
1031 `delimiter`: nil,
1032 `value`: UsingSelf("pod-declaration"),
1033 `closing_delimiters`: Punctuation,
1034 }),
1035 findBrackets(rakuPodDeclaration),
1036 },
1037 Include("pod-blocks"),
1038 },
1039 "pod-blocks": {
1040 // =begin code
1041 {
1042 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
1043 EmitterFunc(podCode),
1044 nil,
1045 },
1046 // =begin
1047 {
1048 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
1049 ByGroupNames(
1050 map[string]Emitter{
1051 `ws`: Comment,
1052 `keyword`: Keyword,
1053 `ws2`: StringDoc,
1054 `name`: Keyword,
1055 `config`: EmitterFunc(podConfig),
1056 `value`: UsingSelf("pod-begin"),
1057 `closing_delimiters`: Keyword,
1058 }),
1059 findBrackets(rakuPod),
1060 },
1061 // =for ...
1062 {
1063 `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1064 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1065 Push("pod-paragraph"),
1066 },
1067 // =config
1068 {
1069 `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1070 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1071 nil,
1072 },
1073 // =alias
1074 {
1075 `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
1076 ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
1077 nil,
1078 },
1079 // =encoding
1080 {
1081 `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
1082 ByGroups(Comment, Keyword, StringDoc, Name),
1083 nil,
1084 },
1085 // =para ...
1086 {
1087 `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
1088 ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1089 Push("pod-paragraph"),
1090 },
1091 // =head1 ...
1092 {
1093 `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
1094 ByGroups(Comment, Keyword, GenericHeading, Keyword),
1095 Push("pod-heading"),
1096 },
1097 // =item ...
1098 {
1099 `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
1100 ByGroups(Comment, Keyword, StringDoc, Keyword),
1101 Push("pod-paragraph"),
1102 },
1103 {
1104 `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
1105 ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1106 Push("pod-finish"),
1107 },
1108 // ={custom} ...
1109 {
1110 `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
1111 ByGroups(Comment, Name, StringDoc, Keyword),
1112 Push("pod-paragraph"),
1113 },
1114 // = podconfig
1115 {
1116 `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
1117 colonPairClosingBrackets + `) *)*\n)`,
1118 ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
1119 nil,
1120 },
1121 },
1122 "pod-begin": {
1123 Include("pod-blocks"),
1124 Include("pre-pod-formatter"),
1125 {`.+?`, StringDoc, nil},
1126 },
1127 "pod-declaration": {
1128 Include("pre-pod-formatter"),
1129 {`.+?`, StringDoc, nil},
1130 },
1131 "pod-paragraph": {
1132 {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
1133 Include("pre-pod-formatter"),
1134 {`.+?`, StringDoc, nil},
1135 },
1136 "pod-single": {
1137 {`\n`, StringDoc, Pop(1)},
1138 Include("pre-pod-formatter"),
1139 {`.+?`, StringDoc, nil},
1140 },
1141 "pod-heading": {
1142 {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
1143 Include("pre-pod-formatter"),
1144 {`.+?`, GenericHeading, nil},
1145 },
1146 "pod-finish": {
1147 {`\z`, nil, Pop(1)},
1148 Include("pre-pod-formatter"),
1149 {`.+?`, StringDoc, nil},
1150 },
1151 "pre-pod-formatter": {
1152 // C<code>, B<bold>, ...
1153 {
1154 `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
1155 ByGroups(Keyword, Punctuation),
1156 findBrackets(rakuPodFormatter),
1157 },
1158 },
1159 "pod-formatter": {
1160 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1161 {`>`, Punctuation, Pop(1)},
1162 Include("pre-pod-formatter"),
1163 // Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
1164 {`.+?`, StringOther, nil},
1165 },
1166 "variable": {
1167 {variablePattern, NameVariable, Push("name-adverb")},
1168 {globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
1169 {`[$@]<[^>]+>`, NameVariable, nil},
1170 {`\$[/!¢]`, NameVariable, nil},
1171 {`[$@%]`, NameVariable, nil},
1172 },
1173 "single-quote": {
1174 {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
1175 },
1176 "single-quote-inner": {
1177 {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
1178 Include("escape-single-quote"),
1179 Include("escape-qq"),
1180 {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
1181 },
1182 "double-quotes": {
1183 {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
1184 Include("qq"),
1185 },
1186 "<<": {
1187 {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1188 Include("ww"),
1189 },
1190 "«": {
1191 {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1192 Include("ww"),
1193 },
1194 "ww": {
1195 Include("single-quote"),
1196 Include("qq"),
1197 },
1198 "qq": {
1199 Include("qq-variable"),
1200 Include("closure"),
1201 Include(`escape-char`),
1202 Include("escape-hexadecimal"),
1203 Include("escape-c-name"),
1204 Include("escape-qq"),
1205 {`.+?`, StringDouble, nil},
1206 },
1207 "qq-variable": {
1208 {
1209 `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
1210 NameVariable,
1211 Push("qq-variable-extras", "name-adverb"),
1212 },
1213 },
1214 "qq-variable-extras": {
1215 // Method
1216 {
1217 `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
1218 ByGroupNames(map[string]Emitter{
1219 `operator`: Operator,
1220 `method_name`: NameFunction,
1221 }),
1222 Push(`name-adverb`),
1223 },
1224 // Function/Signature
1225 {
1226 `\(`, Punctuation, replaceRule(
1227 ruleReplacingConfig{
1228 delimiter: []rune(`)`),
1229 tokenType: Punctuation,
1230 stateName: `root`,
1231 pushState: true,
1232 }),
1233 },
1234 Default(Pop(1)),
1235 },
1236 "Q": {
1237 Include("escape-qq"),
1238 {`.+?`, String, nil},
1239 },
1240 "Q-closure": {
1241 Include("escape-qq"),
1242 Include("closure"),
1243 {`.+?`, String, nil},
1244 },
1245 "Q-variable": {
1246 Include("escape-qq"),
1247 Include("qq-variable"),
1248 {`.+?`, String, nil},
1249 },
1250 "closure": {
1251 {`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
1252 ruleReplacingConfig{
1253 delimiter: []rune(`}`),
1254 tokenType: Punctuation,
1255 stateName: `root`,
1256 pushState: true,
1257 }),
1258 },
1259 },
1260 "token": {
1261 // Token signature
1262 {`\(`, Punctuation, replaceRule(
1263 ruleReplacingConfig{
1264 delimiter: []rune(`)`),
1265 tokenType: Punctuation,
1266 stateName: `root`,
1267 pushState: true,
1268 }),
1269 },
1270 {`{`, Punctuation, replaceRule(
1271 ruleReplacingConfig{
1272 delimiter: []rune(`}`),
1273 tokenType: Punctuation,
1274 stateName: `regex`,
1275 popState: true,
1276 pushState: true,
1277 }),
1278 },
1279 {`\s*`, Text, nil},
1280 Default(Pop(1)),
1281 },
1282 }
1283}
1284
1285// Joins keys of rune map
1286func joinRuneMap(m map[rune]rune) string {
1287 runes := make([]rune, 0, len(m))
1288 for k := range m {
1289 runes = append(runes, k)
1290 }
1291
1292 return string(runes)
1293}
1294
1295// Finds the index of substring in the string starting at position n
1296func indexAt(str []rune, substr []rune, pos int) int {
1297 strFromPos := str[pos:]
1298 text := string(strFromPos)
1299
1300 idx := strings.Index(text, string(substr))
1301 if idx > -1 {
1302 idx = utf8.RuneCountInString(text[:idx])
1303
1304 // Search again if the substr is escaped with backslash
1305 if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
1306 (idx == 1 && strFromPos[idx-1] == '\\') {
1307 idx = indexAt(str[pos:], substr, idx+1)
1308
1309 idx = utf8.RuneCountInString(text[:idx])
1310
1311 if idx < 0 {
1312 return idx
1313 }
1314 }
1315 idx += pos
1316 }
1317
1318 return idx
1319}
1320
1321// Tells if an array of string contains a string
1322func contains(s []string, e string) bool {
1323 for _, value := range s {
1324 if value == e {
1325 return true
1326 }
1327 }
1328 return false
1329}
1330
1331type rulePosition int
1332
1333const (
1334 topRule rulePosition = 0
1335 bottomRule = -1
1336)
1337
1338type ruleMakingConfig struct {
1339 delimiter []rune
1340 pattern string
1341 tokenType Emitter
1342 mutator Mutator
1343 numberOfDelimiterChars int
1344}
1345
1346type ruleReplacingConfig struct {
1347 delimiter []rune
1348 pattern string
1349 tokenType Emitter
1350 numberOfDelimiterChars int
1351 mutator Mutator
1352 appendMutator Mutator
1353 rulePosition rulePosition
1354 stateName string
1355 pop bool
1356 popState bool
1357 pushState bool
1358}
1359
1360// Pops rule from state-stack and replaces the rule with the previous rule
1361func popRule(rule ruleReplacingConfig) MutatorFunc {
1362 return func(state *LexerState) error {
1363 stackName := genStackName(rule.stateName, rule.rulePosition)
1364
1365 stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1366
1367 if ok && len(stack) > 0 {
1368 // Pop from stack
1369 stack = stack[:len(stack)-1]
1370 lastRule := stack[len(stack)-1]
1371 lastRule.pushState = false
1372 lastRule.popState = false
1373 lastRule.pop = true
1374 state.Set(stackName, stack)
1375
1376 // Call replaceRule to use the last rule
1377 err := replaceRule(lastRule)(state)
1378 if err != nil {
1379 panic(err)
1380 }
1381 }
1382
1383 return nil
1384 }
1385}
1386
1387// Replaces a state's rule based on the rule config and position
1388func replaceRule(rule ruleReplacingConfig) MutatorFunc {
1389 return func(state *LexerState) error {
1390 stateName := rule.stateName
1391 stackName := genStackName(rule.stateName, rule.rulePosition)
1392
1393 stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1394 if !ok {
1395 stack = []ruleReplacingConfig{}
1396 }
1397
1398 // If state-stack is empty fill it with the placeholder rule
1399 if len(stack) == 0 {
1400 stack = []ruleReplacingConfig{
1401 {
1402 // Placeholder, will be overwritten by mutators, DO NOT REMOVE!
1403 pattern: `\A\z`,
1404 tokenType: nil,
1405 mutator: nil,
1406 stateName: stateName,
1407 rulePosition: rule.rulePosition,
1408 },
1409 }
1410 state.Set(stackName, stack)
1411 }
1412
1413 var mutator Mutator
1414 mutators := []Mutator{}
1415
1416 switch {
1417 case rule.rulePosition == topRule && rule.mutator == nil:
1418 // Default mutator for top rule
1419 mutators = []Mutator{Pop(1), popRule(rule)}
1420 case rule.rulePosition == topRule && rule.mutator != nil:
1421 // Default mutator for top rule, when rule.mutator is set
1422 mutators = []Mutator{rule.mutator, popRule(rule)}
1423 case rule.mutator != nil:
1424 mutators = []Mutator{rule.mutator}
1425 }
1426
1427 if rule.appendMutator != nil {
1428 mutators = append(mutators, rule.appendMutator)
1429 }
1430
1431 if len(mutators) > 0 {
1432 mutator = Mutators(mutators...)
1433 } else {
1434 mutator = nil
1435 }
1436
1437 ruleConfig := ruleMakingConfig{
1438 pattern: rule.pattern,
1439 delimiter: rule.delimiter,
1440 numberOfDelimiterChars: rule.numberOfDelimiterChars,
1441 tokenType: rule.tokenType,
1442 mutator: mutator,
1443 }
1444
1445 cRule := makeRule(ruleConfig)
1446
1447 switch rule.rulePosition {
1448 case topRule:
1449 state.Rules[stateName][0] = cRule
1450 case bottomRule:
1451 state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
1452 }
1453
1454 // Pop state name from stack if asked. State should be popped first before Pushing
1455 if rule.popState {
1456 err := Pop(1).Mutate(state)
1457 if err != nil {
1458 panic(err)
1459 }
1460 }
1461
1462 // Push state name to stack if asked
1463 if rule.pushState {
1464 err := Push(stateName).Mutate(state)
1465 if err != nil {
1466 panic(err)
1467 }
1468 }
1469
1470 if !rule.pop {
1471 state.Set(stackName, append(stack, rule))
1472 }
1473
1474 return nil
1475 }
1476}
1477
1478// Generates rule replacing stack using state name and rule position
1479func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
1480 switch rulePosition {
1481 case topRule:
1482 stackName = stateName + `-top-stack`
1483 case bottomRule:
1484 stackName = stateName + `-bottom-stack`
1485 }
1486 return
1487}
1488
1489// Makes a compiled rule and returns it
1490func makeRule(config ruleMakingConfig) *CompiledRule {
1491 var rePattern string
1492
1493 if len(config.delimiter) > 0 {
1494 delimiter := string(config.delimiter)
1495
1496 if config.numberOfDelimiterChars > 1 {
1497 delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
1498 }
1499
1500 rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
1501 } else {
1502 rePattern = config.pattern
1503 }
1504
1505 regex := regexp2.MustCompile(rePattern, regexp2.None)
1506
1507 cRule := &CompiledRule{
1508 Rule: Rule{rePattern, config.tokenType, config.mutator},
1509 Regexp: regex,
1510 }
1511
1512 return cRule
1513}
1514
1515// Emitter for colon pairs, changes token state based on key and brackets
1516func colonPair(tokenClass TokenType) Emitter {
1517 return EmitterFunc(func(groups []string, state *LexerState) Iterator {
1518 iterators := []Iterator{}
1519 tokens := []Token{
1520 {Punctuation, state.NamedGroups[`colon`]},
1521 {Punctuation, state.NamedGroups[`opening_delimiters`]},
1522 {Punctuation, state.NamedGroups[`closing_delimiters`]},
1523 }
1524
1525 // Append colon
1526 iterators = append(iterators, Literator(tokens[0]))
1527
1528 if tokenClass == NameAttribute {
1529 iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
1530 } else {
1531 var keyTokenState string
1532 keyre := regexp.MustCompile(`^\d+$`)
1533 if keyre.MatchString(state.NamedGroups[`key`]) {
1534 keyTokenState = "common"
1535 } else {
1536 keyTokenState = "Q"
1537 }
1538
1539 // Use token state to Tokenise key
1540 if keyTokenState != "" {
1541 iterator, err := state.Lexer.Tokenise(
1542 &TokeniseOptions{
1543 State: keyTokenState,
1544 Nested: true,
1545 }, state.NamedGroups[`key`])
1546
1547 if err != nil {
1548 panic(err)
1549 } else {
1550 // Append key
1551 iterators = append(iterators, iterator)
1552 }
1553 }
1554 }
1555
1556 // Append punctuation
1557 iterators = append(iterators, Literator(tokens[1]))
1558
1559 var valueTokenState string
1560
1561 switch state.NamedGroups[`opening_delimiters`] {
1562 case "(", "{", "[":
1563 valueTokenState = "root"
1564 case "<<", "«":
1565 valueTokenState = "ww"
1566 case "<":
1567 valueTokenState = "Q"
1568 }
1569
1570 // Use token state to Tokenise value
1571 if valueTokenState != "" {
1572 iterator, err := state.Lexer.Tokenise(
1573 &TokeniseOptions{
1574 State: valueTokenState,
1575 Nested: true,
1576 }, state.NamedGroups[`value`])
1577
1578 if err != nil {
1579 panic(err)
1580 } else {
1581 // Append value
1582 iterators = append(iterators, iterator)
1583 }
1584 }
1585 // Append last punctuation
1586 iterators = append(iterators, Literator(tokens[2]))
1587
1588 return Concaterator(iterators...)
1589 })
1590}
1591
1592// Emitter for quoting constructs, changes token state based on quote name and adverbs
1593func quote(groups []string, state *LexerState) Iterator {
1594 keyword := state.NamedGroups[`keyword`]
1595 adverbsStr := state.NamedGroups[`adverbs`]
1596 iterators := []Iterator{}
1597 tokens := []Token{
1598 {Keyword, keyword},
1599 {StringAffix, adverbsStr},
1600 {Text, state.NamedGroups[`ws`]},
1601 {Punctuation, state.NamedGroups[`opening_delimiters`]},
1602 {Punctuation, state.NamedGroups[`closing_delimiters`]},
1603 }
1604
1605 // Append all tokens before dealing with the main string
1606 iterators = append(iterators, Literator(tokens[:4]...))
1607
1608 var tokenStates []string
1609
1610 // Set tokenStates based on adverbs
1611 adverbs := strings.Split(adverbsStr, ":")
1612 for _, adverb := range adverbs {
1613 switch adverb {
1614 case "c", "closure":
1615 tokenStates = append(tokenStates, "Q-closure")
1616 case "qq":
1617 tokenStates = append(tokenStates, "qq")
1618 case "ww":
1619 tokenStates = append(tokenStates, "ww")
1620 case "s", "scalar", "a", "array", "h", "hash", "f", "function":
1621 tokenStates = append(tokenStates, "Q-variable")
1622 }
1623 }
1624
1625 var tokenState string
1626
1627 switch {
1628 case keyword == "qq" || contains(tokenStates, "qq"):
1629 tokenState = "qq"
1630 case adverbsStr == "ww" || contains(tokenStates, "ww"):
1631 tokenState = "ww"
1632 case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"):
1633 tokenState = "qq"
1634 case contains(tokenStates, "Q-closure"):
1635 tokenState = "Q-closure"
1636 case contains(tokenStates, "Q-variable"):
1637 tokenState = "Q-variable"
1638 default:
1639 tokenState = "Q"
1640 }
1641
1642 iterator, err := state.Lexer.Tokenise(
1643 &TokeniseOptions{
1644 State: tokenState,
1645 Nested: true,
1646 }, state.NamedGroups[`value`])
1647
1648 if err != nil {
1649 panic(err)
1650 } else {
1651 iterators = append(iterators, iterator)
1652 }
1653
1654 // Append the last punctuation
1655 iterators = append(iterators, Literator(tokens[4]))
1656
1657 return Concaterator(iterators...)
1658}
1659
1660// Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
1661func podConfig(groups []string, state *LexerState) Iterator {
1662 // Tokenise pod config
1663 iterator, err := state.Lexer.Tokenise(
1664 &TokeniseOptions{
1665 State: "colon-pair-attribute",
1666 Nested: true,
1667 }, groups[0])
1668
1669 if err != nil {
1670 panic(err)
1671 } else {
1672 return iterator
1673 }
1674}
1675
1676// Emitter for pod code, tokenises the code based on the lang specified
1677func podCode(groups []string, state *LexerState) Iterator {
1678 iterators := []Iterator{}
1679 tokens := []Token{
1680 {Comment, state.NamedGroups[`ws`]},
1681 {Keyword, state.NamedGroups[`keyword`]},
1682 {Keyword, state.NamedGroups[`ws2`]},
1683 {Keyword, state.NamedGroups[`name`]},
1684 {StringDoc, state.NamedGroups[`value`]},
1685 {Comment, state.NamedGroups[`ws3`]},
1686 {Keyword, state.NamedGroups[`end_keyword`]},
1687 {Keyword, state.NamedGroups[`ws4`]},
1688 {Keyword, state.NamedGroups[`name`]},
1689 }
1690
1691 // Append all tokens before dealing with the pod config
1692 iterators = append(iterators, Literator(tokens[:4]...))
1693
1694 // Tokenise pod config
1695 iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
1696
1697 langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
1698 var lang string
1699 if len(langMatch) > 1 {
1700 lang = langMatch[1]
1701 }
1702
1703 // Tokenise code based on lang property
1704 sublexer := Get(lang)
1705 if sublexer != nil {
1706 iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
1707
1708 if err != nil {
1709 panic(err)
1710 } else {
1711 iterators = append(iterators, iterator)
1712 }
1713 } else {
1714 iterators = append(iterators, Literator(tokens[4]))
1715 }
1716
1717 // Append the rest of the tokens
1718 iterators = append(iterators, Literator(tokens[5:]...))
1719
1720 return Concaterator(iterators...)
1721}
Note: See TracBrowser for help on using the repository browser.