1 | package chroma
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "fmt"
|
---|
5 | )
|
---|
6 |
|
---|
7 | // An Emitter takes group matches and returns tokens.
|
---|
8 | type Emitter interface {
|
---|
9 | // Emit tokens for the given regex groups.
|
---|
10 | Emit(groups []string, state *LexerState) Iterator
|
---|
11 | }
|
---|
12 |
|
---|
13 | // SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
|
---|
14 | type SerialisableEmitter interface {
|
---|
15 | Emitter
|
---|
16 | EmitterKind() string
|
---|
17 | }
|
---|
18 |
|
---|
19 | // EmitterFunc is a function that is an Emitter.
|
---|
20 | type EmitterFunc func(groups []string, state *LexerState) Iterator
|
---|
21 |
|
---|
22 | // Emit tokens for groups.
|
---|
23 | func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
|
---|
24 | return e(groups, state)
|
---|
25 | }
|
---|
26 |
|
---|
27 | type Emitters []Emitter
|
---|
28 |
|
---|
29 | type byGroupsEmitter struct {
|
---|
30 | Emitters
|
---|
31 | }
|
---|
32 |
|
---|
33 | // ByGroups emits a token for each matching group in the rule's regex.
|
---|
34 | func ByGroups(emitters ...Emitter) Emitter {
|
---|
35 | return &byGroupsEmitter{Emitters: emitters}
|
---|
36 | }
|
---|
37 |
|
---|
38 | func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
|
---|
39 |
|
---|
40 | func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
|
---|
41 | iterators := make([]Iterator, 0, len(groups)-1)
|
---|
42 | if len(b.Emitters) != len(groups)-1 {
|
---|
43 | iterators = append(iterators, Error.Emit(groups, state))
|
---|
44 | // panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
|
---|
45 | } else {
|
---|
46 | for i, group := range groups[1:] {
|
---|
47 | if b.Emitters[i] != nil {
|
---|
48 | iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
|
---|
49 | }
|
---|
50 | }
|
---|
51 | }
|
---|
52 | return Concaterator(iterators...)
|
---|
53 | }
|
---|
54 |
|
---|
55 | // ByGroupNames emits a token for each named matching group in the rule's regex.
|
---|
56 | func ByGroupNames(emitters map[string]Emitter) Emitter {
|
---|
57 | return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
---|
58 | iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
|
---|
59 | if len(state.NamedGroups)-1 == 0 {
|
---|
60 | if emitter, ok := emitters[`0`]; ok {
|
---|
61 | iterators = append(iterators, emitter.Emit(groups, state))
|
---|
62 | } else {
|
---|
63 | iterators = append(iterators, Error.Emit(groups, state))
|
---|
64 | }
|
---|
65 | } else {
|
---|
66 | ruleRegex := state.Rules[state.State][state.Rule].Regexp
|
---|
67 | for i := 1; i < len(state.NamedGroups); i++ {
|
---|
68 | groupName := ruleRegex.GroupNameFromNumber(i)
|
---|
69 | group := state.NamedGroups[groupName]
|
---|
70 | if emitter, ok := emitters[groupName]; ok {
|
---|
71 | if emitter != nil {
|
---|
72 | iterators = append(iterators, emitter.Emit([]string{group}, state))
|
---|
73 | }
|
---|
74 | } else {
|
---|
75 | iterators = append(iterators, Error.Emit([]string{group}, state))
|
---|
76 | }
|
---|
77 | }
|
---|
78 | }
|
---|
79 | return Concaterator(iterators...)
|
---|
80 | })
|
---|
81 | }
|
---|
82 |
|
---|
83 | // UsingByGroup emits tokens for the matched groups in the regex using a
|
---|
84 | // "sublexer". Used when lexing code blocks where the name of a sublexer is
|
---|
85 | // contained within the block, for example on a Markdown text block or SQL
|
---|
86 | // language block.
|
---|
87 | //
|
---|
88 | // The sublexer will be retrieved using sublexerGetFunc (typically
|
---|
89 | // internal.Get), using the captured value from the matched sublexerNameGroup.
|
---|
90 | //
|
---|
91 | // If sublexerGetFunc returns a non-nil lexer for the captured sublexerNameGroup,
|
---|
92 | // then tokens for the matched codeGroup will be emitted using the retrieved
|
---|
93 | // lexer. Otherwise, if the sublexer is nil, then tokens will be emitted from
|
---|
94 | // the passed emitter.
|
---|
95 | //
|
---|
96 | // Example:
|
---|
97 | //
|
---|
98 | // var Markdown = internal.Register(MustNewLexer(
|
---|
99 | // &Config{
|
---|
100 | // Name: "markdown",
|
---|
101 | // Aliases: []string{"md", "mkd"},
|
---|
102 | // Filenames: []string{"*.md", "*.mkd", "*.markdown"},
|
---|
103 | // MimeTypes: []string{"text/x-markdown"},
|
---|
104 | // },
|
---|
105 | // Rules{
|
---|
106 | // "root": {
|
---|
107 | // {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
|
---|
108 | // UsingByGroup(
|
---|
109 | // internal.Get,
|
---|
110 | // 2, 4,
|
---|
111 | // String, String, String, Text, String,
|
---|
112 | // ),
|
---|
113 | // nil,
|
---|
114 | // },
|
---|
115 | // },
|
---|
116 | // },
|
---|
117 | // ))
|
---|
118 | //
|
---|
119 | // See the lexers/m/markdown.go for the complete example.
|
---|
120 | //
|
---|
121 | // Note: panic's if the number of emitters does not equal the number of matched
|
---|
122 | // groups in the regex.
|
---|
123 | func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
|
---|
124 | return &usingByGroup{
|
---|
125 | SublexerNameGroup: sublexerNameGroup,
|
---|
126 | CodeGroup: codeGroup,
|
---|
127 | Emitters: emitters,
|
---|
128 | }
|
---|
129 | }
|
---|
130 |
|
---|
131 | type usingByGroup struct {
|
---|
132 | SublexerNameGroup int `xml:"sublexer_name_group"`
|
---|
133 | CodeGroup int `xml:"code_group"`
|
---|
134 | Emitters Emitters `xml:"emitters"`
|
---|
135 | }
|
---|
136 |
|
---|
137 | func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
|
---|
138 | func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
|
---|
139 | // bounds check
|
---|
140 | if len(u.Emitters) != len(groups)-1 {
|
---|
141 | panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
|
---|
142 | }
|
---|
143 |
|
---|
144 | // grab sublexer
|
---|
145 | sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
|
---|
146 |
|
---|
147 | // build iterators
|
---|
148 | iterators := make([]Iterator, len(groups)-1)
|
---|
149 | for i, group := range groups[1:] {
|
---|
150 | if i == u.CodeGroup-1 && sublexer != nil {
|
---|
151 | var err error
|
---|
152 | iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
|
---|
153 | if err != nil {
|
---|
154 | panic(err)
|
---|
155 | }
|
---|
156 | } else if u.Emitters[i] != nil {
|
---|
157 | iterators[i] = u.Emitters[i].Emit([]string{group}, state)
|
---|
158 | }
|
---|
159 | }
|
---|
160 | return Concaterator(iterators...)
|
---|
161 | }
|
---|
162 |
|
---|
163 | // UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
|
---|
164 | //
|
---|
165 | // This Emitter is not serialisable.
|
---|
166 | func UsingLexer(lexer Lexer) Emitter {
|
---|
167 | return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
|
---|
168 | it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
---|
169 | if err != nil {
|
---|
170 | panic(err)
|
---|
171 | }
|
---|
172 | return it
|
---|
173 | })
|
---|
174 | }
|
---|
175 |
|
---|
176 | type usingEmitter struct {
|
---|
177 | Lexer string `xml:"lexer,attr"`
|
---|
178 | }
|
---|
179 |
|
---|
180 | func (u *usingEmitter) EmitterKind() string { return "using" }
|
---|
181 |
|
---|
182 | func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
|
---|
183 | if state.Registry == nil {
|
---|
184 | panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
|
---|
185 | }
|
---|
186 | lexer := state.Registry.Get(u.Lexer)
|
---|
187 | if lexer == nil {
|
---|
188 | panic(fmt.Sprintf("no such lexer %q", u.Lexer))
|
---|
189 | }
|
---|
190 | it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
---|
191 | if err != nil {
|
---|
192 | panic(err)
|
---|
193 | }
|
---|
194 | return it
|
---|
195 | }
|
---|
196 |
|
---|
197 | // Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
|
---|
198 | //
|
---|
199 | // The referenced lexer must be stored in the same LexerRegistry.
|
---|
200 | func Using(lexer string) Emitter {
|
---|
201 | return &usingEmitter{Lexer: lexer}
|
---|
202 | }
|
---|
203 |
|
---|
204 | type usingSelfEmitter struct {
|
---|
205 | State string `xml:"state,attr"`
|
---|
206 | }
|
---|
207 |
|
---|
208 | func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
|
---|
209 |
|
---|
210 | func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
|
---|
211 | it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
|
---|
212 | if err != nil {
|
---|
213 | panic(err)
|
---|
214 | }
|
---|
215 | return it
|
---|
216 | }
|
---|
217 |
|
---|
218 | // UsingSelf is like Using, but uses the current Lexer.
|
---|
219 | func UsingSelf(stateName string) Emitter {
|
---|
220 | return &usingSelfEmitter{stateName}
|
---|
221 | }
|
---|