219 lines
6.3 KiB
Go
219 lines
6.3 KiB
Go
package chroma
|
|
|
|
import (
|
|
"fmt"
|
|
)
|
|
|
|
// An Emitter takes group matches and returns tokens.
|
|
type Emitter interface {
|
|
// Emit tokens for the given regex groups.
|
|
Emit(groups []string, state *LexerState) Iterator
|
|
}
|
|
|
|
// SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
|
|
type SerialisableEmitter interface {
|
|
Emitter
|
|
EmitterKind() string
|
|
}
|
|
|
|
// EmitterFunc is a function that is an Emitter.
|
|
type EmitterFunc func(groups []string, state *LexerState) Iterator
|
|
|
|
// Emit tokens for groups.
|
|
func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
|
|
return e(groups, state)
|
|
}
|
|
|
|
type Emitters []Emitter
|
|
|
|
type byGroupsEmitter struct {
|
|
Emitters
|
|
}
|
|
|
|
// ByGroups emits a token for each matching group in the rule's regex.
|
|
func ByGroups(emitters ...Emitter) Emitter {
|
|
return &byGroupsEmitter{Emitters: emitters}
|
|
}
|
|
|
|
func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
|
|
|
|
func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
|
|
iterators := make([]Iterator, 0, len(groups)-1)
|
|
if len(b.Emitters) != len(groups)-1 {
|
|
iterators = append(iterators, Error.Emit(groups, state))
|
|
// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
|
|
} else {
|
|
for i, group := range groups[1:] {
|
|
if b.Emitters[i] != nil {
|
|
iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
|
|
}
|
|
}
|
|
}
|
|
return Concaterator(iterators...)
|
|
}
|
|
|
|
// ByGroupNames emits a token for each named matching group in the rule's regex.
|
|
func ByGroupNames(emitters map[string]Emitter) Emitter {
|
|
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
|
|
iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
|
|
if len(state.NamedGroups)-1 == 0 {
|
|
if emitter, ok := emitters[`0`]; ok {
|
|
iterators = append(iterators, emitter.Emit(groups, state))
|
|
} else {
|
|
iterators = append(iterators, Error.Emit(groups, state))
|
|
}
|
|
} else {
|
|
ruleRegex := state.Rules[state.State][state.Rule].Regexp
|
|
for i := 1; i < len(state.NamedGroups); i++ {
|
|
groupName := ruleRegex.GroupNameFromNumber(i)
|
|
group := state.NamedGroups[groupName]
|
|
if emitter, ok := emitters[groupName]; ok {
|
|
if emitter != nil {
|
|
iterators = append(iterators, emitter.Emit([]string{group}, state))
|
|
}
|
|
} else {
|
|
iterators = append(iterators, Error.Emit([]string{group}, state))
|
|
}
|
|
}
|
|
}
|
|
return Concaterator(iterators...)
|
|
})
|
|
}
|
|
|
|
// UsingByGroup emits tokens for the matched groups in the regex using a
|
|
// sublexer. Used when lexing code blocks where the name of a sublexer is
|
|
// contained within the block, for example on a Markdown text block or SQL
|
|
// language block.
|
|
//
|
|
// An attempt to load the sublexer will be made using the captured value from
|
|
// the text of the matched sublexerNameGroup. If a sublexer matching the
|
|
// sublexerNameGroup is available, then tokens for the matched codeGroup will
|
|
// be emitted using the sublexer. Otherwise, if no sublexer is available, then
|
|
// tokens will be emitted from the passed emitter.
|
|
//
|
|
// Example:
|
|
//
|
|
// var Markdown = internal.Register(MustNewLexer(
|
|
// &Config{
|
|
// Name: "markdown",
|
|
// Aliases: []string{"md", "mkd"},
|
|
// Filenames: []string{"*.md", "*.mkd", "*.markdown"},
|
|
// MimeTypes: []string{"text/x-markdown"},
|
|
// },
|
|
// Rules{
|
|
// "root": {
|
|
// {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
|
|
// UsingByGroup(
|
|
// 2, 4,
|
|
// String, String, String, Text, String,
|
|
// ),
|
|
// nil,
|
|
// },
|
|
// },
|
|
// },
|
|
// ))
|
|
//
|
|
// See the lexers/markdown.go for the complete example.
|
|
//
|
|
// Note: panic's if the number of emitters does not equal the number of matched
|
|
// groups in the regex.
|
|
func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
|
|
return &usingByGroup{
|
|
SublexerNameGroup: sublexerNameGroup,
|
|
CodeGroup: codeGroup,
|
|
Emitters: emitters,
|
|
}
|
|
}
|
|
|
|
type usingByGroup struct {
|
|
SublexerNameGroup int `xml:"sublexer_name_group"`
|
|
CodeGroup int `xml:"code_group"`
|
|
Emitters Emitters `xml:"emitters"`
|
|
}
|
|
|
|
func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
|
|
func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
|
|
// bounds check
|
|
if len(u.Emitters) != len(groups)-1 {
|
|
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
|
|
}
|
|
|
|
// grab sublexer
|
|
sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
|
|
|
|
// build iterators
|
|
iterators := make([]Iterator, len(groups)-1)
|
|
for i, group := range groups[1:] {
|
|
if i == u.CodeGroup-1 && sublexer != nil {
|
|
var err error
|
|
iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
} else if u.Emitters[i] != nil {
|
|
iterators[i] = u.Emitters[i].Emit([]string{group}, state)
|
|
}
|
|
}
|
|
return Concaterator(iterators...)
|
|
}
|
|
|
|
// UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
|
|
//
|
|
// This Emitter is not serialisable.
|
|
func UsingLexer(lexer Lexer) Emitter {
|
|
return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
|
|
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return it
|
|
})
|
|
}
|
|
|
|
type usingEmitter struct {
|
|
Lexer string `xml:"lexer,attr"`
|
|
}
|
|
|
|
func (u *usingEmitter) EmitterKind() string { return "using" }
|
|
|
|
func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
|
|
if state.Registry == nil {
|
|
panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
|
|
}
|
|
lexer := state.Registry.Get(u.Lexer)
|
|
if lexer == nil {
|
|
panic(fmt.Sprintf("no such lexer %q", u.Lexer))
|
|
}
|
|
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return it
|
|
}
|
|
|
|
// Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
|
|
//
|
|
// The referenced lexer must be stored in the same LexerRegistry.
|
|
func Using(lexer string) Emitter {
|
|
return &usingEmitter{Lexer: lexer}
|
|
}
|
|
|
|
type usingSelfEmitter struct {
|
|
State string `xml:"state,attr"`
|
|
}
|
|
|
|
func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
|
|
|
|
func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
|
|
it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return it
|
|
}
|
|
|
|
// UsingSelf is like Using, but uses the current Lexer.
|
|
func UsingSelf(stateName string) Emitter {
|
|
return &usingSelfEmitter{stateName}
|
|
}
|