Replaced image dimension string variable with Dimensions struct, replaced handrolled HTML generation with gohtml library, added height and width to img tags
This commit is contained in:
parent
ac67d710c8
commit
43a515c896
19
cmd/files.go
19
cmd/files.go
|
@ -34,6 +34,11 @@ var (
|
|||
extensions = [6]string{".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
|
||||
)
|
||||
|
||||
type Dimensions struct {
|
||||
Width int
|
||||
Height int
|
||||
}
|
||||
|
||||
type Files struct {
|
||||
Mutex sync.Mutex
|
||||
List map[string][]string
|
||||
|
@ -105,21 +110,21 @@ func humanReadableSize(bytes int) string {
|
|||
float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
func getImageDimensions(path string) (string, error) {
|
||||
func getImageDimensions(path string) (*Dimensions, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return &Dimensions{}, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
myImage, _, err := image.DecodeConfig(file)
|
||||
if errors.Is(err, image.ErrFormat) {
|
||||
return "", nil
|
||||
return &Dimensions{Width: 0, Height: 0}, nil
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
return &Dimensions{}, err
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%vx%v", myImage.Width, myImage.Height), nil
|
||||
return &Dimensions{Width: myImage.Width, Height: myImage.Height}, nil
|
||||
}
|
||||
|
||||
func preparePath(path string) string {
|
||||
|
@ -336,7 +341,7 @@ func pathIsValid(filePath string, paths []string) bool {
|
|||
switch {
|
||||
case Verbose && !matchesPrefix:
|
||||
fmt.Printf("%v | Error: Failed to serve file outside specified path(s): %v\n",
|
||||
time.Now().Format(LOGDATE),
|
||||
time.Now().Format(LogDate),
|
||||
filePath,
|
||||
)
|
||||
|
||||
|
@ -489,7 +494,7 @@ func pickFile(args []string, filters *Filters, sort string, fileCache *[]string)
|
|||
|
||||
if Verbose {
|
||||
fmt.Printf("%v | Scanned %v/%v files across %v directories in %v\n",
|
||||
time.Now().Format(LOGDATE),
|
||||
time.Now().Format(LogDate),
|
||||
stats.GetFilesMatched(),
|
||||
stats.GetFilesTotal(),
|
||||
stats.GetDirectoriesMatched(),
|
||||
|
|
|
@ -10,7 +10,7 @@ import (
|
|||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var Version = "0.23.1"
|
||||
var Version = "0.24.0"
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(versionCmd)
|
||||
|
|
70
cmd/web.go
70
cmd/web.go
|
@ -17,12 +17,18 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/yosssi/gohtml"
|
||||
)
|
||||
|
||||
const (
|
||||
LOGDATE string = "2006-01-02T15:04:05.000-07:00"
|
||||
PREFIX string = "/src"
|
||||
LogDate string = `2006-01-02T15:04:05.000-07:00`
|
||||
Prefix string = `/src`
|
||||
RedirectStatusCode int = http.StatusSeeOther
|
||||
|
||||
// You may not like it, but this is what peak string generation looks like.
|
||||
RefreshScriptTemplate string = `<script>setTimeout(function(){window.location.href = '/%v';},%v);</script>`
|
||||
HtmlTemplate string = `<html lang="en"><head><style>a{display:block;height:100%%;width:100%%;text-decoration:none;}img{max-width:100%%;max-height:97vh;object-fit:contain;}</style><title>%v (%vx%v)</title></head><body><a href="/%v"><img src="%v" width="%v" height="%v" alt="Roulette selected: %v"></img></a>%v</body></html>`
|
||||
)
|
||||
|
||||
type Filters struct {
|
||||
|
@ -167,7 +173,7 @@ func stripQueryParams(u string) (string, error) {
|
|||
}
|
||||
|
||||
func generateFilePath(filePath string) string {
|
||||
htmlBody := PREFIX
|
||||
htmlBody := Prefix
|
||||
if runtime.GOOS == "windows" {
|
||||
htmlBody += "/"
|
||||
}
|
||||
|
@ -186,7 +192,7 @@ func refererToUri(referer string) string {
|
|||
return "/" + parts[3]
|
||||
}
|
||||
|
||||
func serveHtml(w http.ResponseWriter, r *http.Request, filePath, dimensions string, filters *Filters) error {
|
||||
func serveHtml(w http.ResponseWriter, r *http.Request, filePath string, dimensions *Dimensions, filters *Filters) error {
|
||||
fileName := filepath.Base(filePath)
|
||||
|
||||
w.Header().Add("Content-Type", "text/html")
|
||||
|
@ -198,43 +204,33 @@ func serveHtml(w http.ResponseWriter, r *http.Request, filePath, dimensions stri
|
|||
|
||||
queryParams, err := generateQueryParams(filters, r.URL.Query().Get("sort"), refreshInterval)
|
||||
|
||||
htmlBody := `<html lang="en">
|
||||
<head>
|
||||
<style>
|
||||
a{display:block;height:100%;width:100%;text-decoration:none}
|
||||
img{max-width:100%;max-height:97vh;height:auto;}
|
||||
</style>
|
||||
<title>`
|
||||
htmlBody += fmt.Sprintf("%v (%v)", fileName, dimensions)
|
||||
htmlBody += `</title>
|
||||
</head>
|
||||
<body>
|
||||
<a href="/`
|
||||
htmlBody += queryParams
|
||||
htmlBody += `"><img src="`
|
||||
htmlBody += generateFilePath(filePath)
|
||||
htmlBody += `"></img></a>`
|
||||
refreshScript := ""
|
||||
if refreshInterval != "0" {
|
||||
r, err := strconv.Atoi(refreshInterval)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
refreshTimer := strconv.Itoa(r * 1000)
|
||||
htmlBody += `
|
||||
<script>
|
||||
setTimeout(function(){
|
||||
window.location.href = '`
|
||||
htmlBody += fmt.Sprintf("/%v", queryParams)
|
||||
htmlBody += `';
|
||||
},`
|
||||
htmlBody += fmt.Sprintf("%v);\n", refreshTimer)
|
||||
htmlBody += ` </script>`
|
||||
refreshScript = fmt.Sprintf(RefreshScriptTemplate,
|
||||
queryParams,
|
||||
refreshTimer)
|
||||
}
|
||||
htmlBody += `
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
_, err = io.WriteString(w, htmlBody)
|
||||
htmlBody := fmt.Sprintf(HtmlTemplate,
|
||||
fileName,
|
||||
dimensions.Width,
|
||||
dimensions.Height,
|
||||
queryParams,
|
||||
generateFilePath(filePath),
|
||||
dimensions.Width,
|
||||
dimensions.Height,
|
||||
fileName,
|
||||
refreshScript,
|
||||
)
|
||||
|
||||
formattedBody := gohtml.Format(htmlBody)
|
||||
|
||||
_, err = io.WriteString(w, formattedBody)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -243,12 +239,12 @@ func serveHtml(w http.ResponseWriter, r *http.Request, filePath, dimensions stri
|
|||
}
|
||||
|
||||
func serveStaticFile(w http.ResponseWriter, r *http.Request, paths []string) error {
|
||||
prefixedFilePath, err := stripQueryParams(r.URL.Path)
|
||||
PrefixedFilePath, err := stripQueryParams(r.URL.Path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
filePath, err := filepath.EvalSymlinks(strings.TrimPrefix(prefixedFilePath, PREFIX))
|
||||
filePath, err := filepath.EvalSymlinks(strings.TrimPrefix(PrefixedFilePath, Prefix))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -279,7 +275,7 @@ func serveStaticFile(w http.ResponseWriter, r *http.Request, paths []string) err
|
|||
|
||||
if Verbose {
|
||||
fmt.Printf("%v | Served %v (%v) to %v in %v\n",
|
||||
startTime.Format(LOGDATE),
|
||||
startTime.Format(LogDate),
|
||||
filePath,
|
||||
humanReadableSize(len(buf)),
|
||||
r.RemoteAddr,
|
||||
|
@ -406,7 +402,7 @@ func ServePage(args []string) error {
|
|||
fileCache := []string{}
|
||||
|
||||
http.Handle("/", serveHtmlHandler(paths, *re, &fileCache))
|
||||
http.Handle(PREFIX+"/", http.StripPrefix(PREFIX, serveStaticFileHandler(paths)))
|
||||
http.Handle(Prefix+"/", http.StripPrefix(Prefix, serveStaticFileHandler(paths)))
|
||||
http.HandleFunc("/favicon.ico", doNothing)
|
||||
|
||||
err = http.ListenAndServe(":"+strconv.FormatInt(int64(Port), 10), nil)
|
||||
|
|
2
go.mod
2
go.mod
|
@ -5,10 +5,12 @@ go 1.19
|
|||
require (
|
||||
github.com/h2non/filetype v1.1.3
|
||||
github.com/spf13/cobra v1.6.1
|
||||
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4
|
||||
golang.org/x/image v0.1.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/inconshreveable/mousetrap v1.0.1 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
|
||||
)
|
||||
|
|
3
go.sum
3
go.sum
|
@ -8,6 +8,8 @@ github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA=
|
|||
github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4 h1:0sw0nJM544SpsihWx1bkXdYLQDlzRflMgFJQ4Yih9ts=
|
||||
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4/go.mod h1:+ccdNT0xMY1dtc5XBxumbYfOUhmduiGudqaDgD2rVRE=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
|
@ -16,6 +18,7 @@ golang.org/x/image v0.1.0/go.mod h1:iyPr49SD/G/TBxYVB/9RRtGUT5eNbo2u4NamWeQcD5c=
|
|||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Keiji Yoshida
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,194 @@
|
|||
# GoHTML - HTML formatter for Go
|
||||
|
||||
[![wercker status](https://app.wercker.com/status/926cf3edc004271539be40d705d037bd/s "wercker status")](https://app.wercker.com/project/bykey/926cf3edc004271539be40d705d037bd)
|
||||
[![GoDoc](http://godoc.org/github.com/yosssi/gohtml?status.png)](http://godoc.org/github.com/yosssi/gohtml)
|
||||
|
||||
GoHTML is an HTML formatter for [Go](http://golang.org/). You can format HTML source codes by using this package.
|
||||
|
||||
## Install
|
||||
|
||||
```
|
||||
go get -u github.com/yosssi/gohtml
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
Example Go source code:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/yosssi/gohtml"
|
||||
)
|
||||
|
||||
func main() {
|
||||
h := `<!DOCTYPE html><html><head><title>This is a title.</title><script type="text/javascript">
|
||||
alert('aaa');
|
||||
if (0 < 1) {
|
||||
alert('bbb');
|
||||
}
|
||||
</script><style type="text/css">
|
||||
body {font-size: 14px;}
|
||||
h1 {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style></head><body><form><input type="name"><p>AAA<br>BBB></p></form><!-- This is a comment. --></body></html>`
|
||||
fmt.Println(gohtml.Format(h))
|
||||
}
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>
|
||||
This is a title.
|
||||
</title>
|
||||
<script type="text/javascript">
|
||||
alert('aaa');
|
||||
if (0 < 1) {
|
||||
alert('bbb');
|
||||
}
|
||||
</script>
|
||||
<style type="text/css">
|
||||
body {font-size: 14px;}
|
||||
h1 {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<form>
|
||||
<input type="name">
|
||||
<p>
|
||||
AAA
|
||||
<br>
|
||||
BBB>
|
||||
</p>
|
||||
</form>
|
||||
<!-- This is a comment. -->
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
## Output Formatted HTML with Line No
|
||||
|
||||
You can output formatted HTML source codes with line no by calling `FormatWithLineNo`:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/yosssi/gohtml"
|
||||
)
|
||||
|
||||
func main() {
|
||||
h := `<!DOCTYPE html><html><head><title>This is a title.</title><script type="text/javascript">
|
||||
alert('aaa');
|
||||
if (0 < 1) {
|
||||
alert('bbb');
|
||||
}
|
||||
</script><style type="text/css">
|
||||
body {font-size: 14px;}
|
||||
h1 {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
</style></head><body><form><input type="name"><p>AAA<br>BBB></p></form><!-- This is a comment. --></body></html>`
|
||||
fmt.Println(gohtml.FormatWithLineNo(h))
|
||||
}
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```sh
|
||||
1 <!DOCTYPE html>
|
||||
2 <html>
|
||||
3 <head>
|
||||
4 <title>
|
||||
5 This is a title.
|
||||
6 </title>
|
||||
7 <script type="text/javascript">
|
||||
8 alert('aaa');
|
||||
9 if (0 < 1) {
|
||||
10 alert('bbb');
|
||||
11 }
|
||||
12 </script>
|
||||
13 <style type="text/css">
|
||||
14 body {font-size: 14px;}
|
||||
15 h1 {
|
||||
16 font-size: 16px;
|
||||
17 font-weight: bold;
|
||||
18 }
|
||||
19 </style>
|
||||
20 </head>
|
||||
21 <body>
|
||||
22 <form>
|
||||
23 <input type="name">
|
||||
24 <p>
|
||||
25 AAA
|
||||
26 <br>
|
||||
27 BBB>
|
||||
28 </p>
|
||||
29 </form>
|
||||
30 <!-- This is a comment. -->
|
||||
31 </body>
|
||||
32 </html>
|
||||
```
|
||||
|
||||
## Format Go html/template Package's Template's Execute Result
|
||||
|
||||
You can format [Go html/template package](http://golang.org/pkg/html/template/)'s template's execute result by passing `Writer` to the `tpl.Execute`:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"text/template"
|
||||
|
||||
"github.com/yosssi/gohtml"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
tpl, err := template.New("test").Parse("<html><head></head><body>{{.Msg}}</body></html>")
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
data := map[string]interface{}{"Msg": "Hello!"}
|
||||
|
||||
err = tpl.Execute(gohtml.NewWriter(os.Stdout), data)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```html
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
Hello!
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
## Docs
|
||||
|
||||
* [GoDoc](https://godoc.org/github.com/yosssi/gohtml)
|
|
@ -0,0 +1,7 @@
|
|||
package gohtml
|
||||
|
||||
const (
|
||||
defaultIndentString = " "
|
||||
startIndent = 0
|
||||
defaultLastElement = "</html>"
|
||||
)
|
|
@ -0,0 +1,2 @@
|
|||
// Package gohtml provides an HTML formatting function.
|
||||
package gohtml
|
|
@ -0,0 +1,7 @@
|
|||
package gohtml
|
||||
|
||||
// An element represents an HTML element.
|
||||
type element interface {
|
||||
isInline() bool
|
||||
write(*formattedBuffer, bool) bool
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package gohtml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Format parses the input HTML string, formats it and returns the result.
|
||||
func Format(s string) string {
|
||||
return parse(strings.NewReader(s)).html()
|
||||
}
|
||||
|
||||
// FormatBytes parses input HTML as bytes, formats it and returns the result.
|
||||
func FormatBytes(b []byte) []byte {
|
||||
return parse(bytes.NewReader(b)).bytes()
|
||||
}
|
||||
|
||||
// Format parses the input HTML string, formats it and returns the result with line no.
|
||||
func FormatWithLineNo(s string) string {
|
||||
return AddLineNo(Format(s))
|
||||
}
|
||||
|
||||
func AddLineNo(s string) string {
|
||||
lines := strings.Split(s, "\n")
|
||||
maxLineNoStrLen := len(strconv.Itoa(len(lines)))
|
||||
bf := &bytes.Buffer{}
|
||||
for i, line := range lines {
|
||||
lineNoStr := strconv.Itoa(i + 1)
|
||||
if i > 0 {
|
||||
bf.WriteString("\n")
|
||||
}
|
||||
bf.WriteString(strings.Repeat(" ", maxLineNoStrLen-len(lineNoStr)) + lineNoStr + " " + line)
|
||||
}
|
||||
return bf.String()
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package gohtml
|
||||
|
||||
import "bytes"
|
||||
|
||||
// Column to wrap lines to (disabled by default)
|
||||
var LineWrapColumn = 0
|
||||
|
||||
// Maxmimum characters a long word can extend past LineWrapColumn without wrapping
|
||||
var LineWrapMaxSpillover = 5
|
||||
|
||||
// An htmlDocument represents an HTML document.
|
||||
type htmlDocument struct {
|
||||
elements []element
|
||||
}
|
||||
|
||||
// html generates an HTML source code and returns it.
|
||||
func (htmlDoc *htmlDocument) html() string {
|
||||
return string(htmlDoc.bytes())
|
||||
}
|
||||
|
||||
// bytes reads from htmlDocument's internal array of elements and returns HTML source code
|
||||
func (htmlDoc *htmlDocument) bytes() []byte {
|
||||
bf := &formattedBuffer{
|
||||
buffer: &bytes.Buffer{},
|
||||
|
||||
lineWrapColumn: LineWrapColumn,
|
||||
lineWrapMaxSpillover: LineWrapMaxSpillover,
|
||||
|
||||
indentString: defaultIndentString,
|
||||
indentLevel: startIndent,
|
||||
}
|
||||
|
||||
isPreviousNodeInline := true
|
||||
for _, child := range htmlDoc.elements {
|
||||
isPreviousNodeInline = child.write(bf, isPreviousNodeInline)
|
||||
}
|
||||
return bf.buffer.Bytes()
|
||||
}
|
||||
|
||||
// append appends an element to the htmlDocument.
|
||||
func (htmlDoc *htmlDocument) append(e element) {
|
||||
htmlDoc.elements = append(htmlDoc.elements, e)
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
package gohtml
|
||||
|
||||
import (
|
||||
"golang.org/x/net/html"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// parse parses a stirng and converts it into an html.
|
||||
func parse(r io.Reader) *htmlDocument {
|
||||
htmlDoc := &htmlDocument{}
|
||||
tokenizer := html.NewTokenizer(r)
|
||||
for {
|
||||
if errorToken, _, _ := parseToken(tokenizer, htmlDoc, nil); errorToken {
|
||||
break
|
||||
}
|
||||
}
|
||||
return htmlDoc
|
||||
}
|
||||
|
||||
// Function that identifies which tags will be treated as containing preformatted
|
||||
// content. Such tags will have the formatting of all its contents preserved
|
||||
// unchanged.
|
||||
// The opening tag html.Token is passed to this function.
|
||||
// By default, only <pre> and <textarea> tags are considered preformatted.
|
||||
var IsPreformatted = func(token html.Token) bool {
|
||||
return token.Data == "pre" || token.Data == "textarea"
|
||||
}
|
||||
|
||||
func parseToken(tokenizer *html.Tokenizer, htmlDoc *htmlDocument, parent *tagElement) (bool, bool, string) {
|
||||
tokenType := tokenizer.Next()
|
||||
switch tokenType {
|
||||
case html.ErrorToken:
|
||||
return true, false, ""
|
||||
case html.TextToken:
|
||||
text := string(tokenizer.Raw())
|
||||
if strings.TrimSpace(text) == "" && (parent == nil || !parent.isRaw) {
|
||||
break
|
||||
}
|
||||
textElement := &textElement{text: text, parent: parent}
|
||||
appendElement(htmlDoc, parent, textElement)
|
||||
case html.StartTagToken:
|
||||
raw := string(tokenizer.Raw())
|
||||
token := tokenizer.Token()
|
||||
tagElement := &tagElement{
|
||||
tagName: string(token.Data),
|
||||
startTagRaw: raw,
|
||||
isRaw: IsPreformatted(token) || (parent != nil && parent.isRaw),
|
||||
parent: parent,
|
||||
}
|
||||
appendElement(htmlDoc, parent, tagElement)
|
||||
for {
|
||||
errorToken, parentEnded, unsetEndTag := parseToken(tokenizer, htmlDoc, tagElement)
|
||||
if errorToken {
|
||||
return true, false, ""
|
||||
}
|
||||
if parentEnded {
|
||||
if unsetEndTag != "" {
|
||||
return false, false, unsetEndTag
|
||||
}
|
||||
break
|
||||
}
|
||||
if unsetEndTag != "" {
|
||||
return false, false, setEndTagRaw(tokenizer, tagElement, unsetEndTag)
|
||||
}
|
||||
}
|
||||
case html.EndTagToken:
|
||||
return false, true, setEndTagRaw(tokenizer, parent, getTagName(tokenizer))
|
||||
case html.DoctypeToken, html.SelfClosingTagToken, html.CommentToken:
|
||||
tagElement := &tagElement{
|
||||
tagName: getTagName(tokenizer),
|
||||
startTagRaw: string(tokenizer.Raw()),
|
||||
isRaw: parent != nil && parent.isRaw,
|
||||
parent: parent,
|
||||
}
|
||||
appendElement(htmlDoc, parent, tagElement)
|
||||
}
|
||||
return false, false, ""
|
||||
}
|
||||
|
||||
// appendElement appends the element to the htmlDocument or parent tagElement.
|
||||
func appendElement(htmlDoc *htmlDocument, parent *tagElement, e element) {
|
||||
if parent != nil {
|
||||
parent.appendChild(e)
|
||||
} else {
|
||||
htmlDoc.append(e)
|
||||
}
|
||||
}
|
||||
|
||||
// getTagName gets a tagName from tokenizer.
|
||||
func getTagName(tokenizer *html.Tokenizer) string {
|
||||
tagName, _ := tokenizer.TagName()
|
||||
return string(tagName)
|
||||
}
|
||||
|
||||
// setEndTagRaw sets an endTagRaw to the parent.
|
||||
func setEndTagRaw(tokenizer *html.Tokenizer, parent *tagElement, tagName string) string {
|
||||
if parent != nil && parent.tagName == tagName {
|
||||
parent.endTagRaw = string(tokenizer.Raw())
|
||||
return ""
|
||||
}
|
||||
return tagName
|
||||
}
|
|
@ -0,0 +1,143 @@
|
|||
package gohtml
|
||||
|
||||
import "bytes"
|
||||
|
||||
// A tagElement represents a tag element of an HTML document.
|
||||
type tagElement struct {
|
||||
tagName string
|
||||
startTagRaw string
|
||||
endTagRaw string
|
||||
|
||||
parent *tagElement
|
||||
children []element
|
||||
|
||||
isRaw bool
|
||||
isChildrenInlineCache *bool
|
||||
}
|
||||
|
||||
// Enable condensing a tag with only inline children onto a single line, or
|
||||
// completely inlining it with sibling nodes.
|
||||
// Tags to be treated as inline can be set in `InlineTags`.
|
||||
// Only inline tags will be completely inlined, while other condensable tags
|
||||
// will be given their own dedicated (single) line.
|
||||
var Condense bool
|
||||
|
||||
// Tags that are considered inline tags.
|
||||
// Note: Text nodes are always considered to be inline
|
||||
var InlineTags = map[string]bool{
|
||||
"a": true,
|
||||
"code": true,
|
||||
"em": true,
|
||||
"span": true,
|
||||
"strong": true,
|
||||
}
|
||||
|
||||
// Maximum length of an opening inline tag before it's un-inlined
|
||||
var InlineTagMaxLength = 40
|
||||
|
||||
func (e *tagElement) isInline() bool {
|
||||
if e.isRaw || !InlineTags[e.tagName] || len(e.startTagRaw) > InlineTagMaxLength {
|
||||
return false
|
||||
}
|
||||
return e.isChildrenInline()
|
||||
}
|
||||
|
||||
func (e *tagElement) isChildrenInline() bool {
|
||||
if !Condense {
|
||||
return false
|
||||
}
|
||||
if e.isChildrenInlineCache != nil {
|
||||
return *e.isChildrenInlineCache
|
||||
}
|
||||
|
||||
isInline := true
|
||||
for _, child := range e.children {
|
||||
isInline = isInline && child.isInline()
|
||||
}
|
||||
|
||||
e.isChildrenInlineCache = &isInline
|
||||
return isInline
|
||||
}
|
||||
|
||||
// write writes a tag to the buffer.
|
||||
func (e *tagElement) write(bf *formattedBuffer, isPreviousNodeInline bool) bool {
|
||||
if e.isRaw {
|
||||
if e.parent != nil && !e.parent.isRaw {
|
||||
bf.writeLineFeed()
|
||||
bf.writeIndent()
|
||||
bf.rawMode = true
|
||||
defer func() {
|
||||
bf.rawMode = false
|
||||
}()
|
||||
}
|
||||
bf.writeToken(e.startTagRaw, formatterTokenType_Tag)
|
||||
for _, child := range e.children {
|
||||
child.write(bf, true)
|
||||
}
|
||||
bf.writeToken(e.endTagRaw, formatterTokenType_Tag)
|
||||
return false
|
||||
}
|
||||
|
||||
if e.isChildrenInline() && (e.endTagRaw != "" || e.isInline()) {
|
||||
// Write the condensed output to a separate buffer, in case it doesn't work out
|
||||
condensedBuffer := *bf
|
||||
condensedBuffer.buffer = &bytes.Buffer{}
|
||||
|
||||
if bf.buffer.Len() > 0 && (!isPreviousNodeInline || !e.isInline()) {
|
||||
condensedBuffer.writeLineFeed()
|
||||
}
|
||||
condensedBuffer.writeToken(e.startTagRaw, formatterTokenType_Tag)
|
||||
if !isPreviousNodeInline && e.endTagRaw != "" {
|
||||
condensedBuffer.indentLevel++
|
||||
}
|
||||
|
||||
for _, child := range e.children {
|
||||
child.write(&condensedBuffer, true)
|
||||
}
|
||||
if e.endTagRaw != "" {
|
||||
condensedBuffer.writeToken(e.endTagRaw, formatterTokenType_Tag)
|
||||
if !isPreviousNodeInline {
|
||||
condensedBuffer.indentLevel--
|
||||
}
|
||||
}
|
||||
|
||||
if e.isInline() || bytes.IndexAny(condensedBuffer.buffer.Bytes()[1:], "\n") == -1 {
|
||||
// If we're an inline tag, or there were no newlines were in the buffer,
|
||||
// replace the original with the condensed version
|
||||
condensedBuffer.buffer = bytes.NewBuffer(bytes.Join([][]byte{
|
||||
bf.buffer.Bytes(), condensedBuffer.buffer.Bytes(),
|
||||
}, []byte{}))
|
||||
*bf = condensedBuffer
|
||||
|
||||
return e.isInline()
|
||||
}
|
||||
}
|
||||
|
||||
if bf.buffer.Len() > 0 {
|
||||
bf.writeLineFeed()
|
||||
}
|
||||
bf.writeToken(e.startTagRaw, formatterTokenType_Tag)
|
||||
if e.endTagRaw != "" {
|
||||
bf.indentLevel++
|
||||
}
|
||||
|
||||
isPreviousNodeInline = false
|
||||
for _, child := range e.children {
|
||||
isPreviousNodeInline = child.write(bf, isPreviousNodeInline)
|
||||
}
|
||||
|
||||
if e.endTagRaw != "" {
|
||||
if len(e.children) > 0 {
|
||||
bf.writeLineFeed()
|
||||
}
|
||||
bf.indentLevel--
|
||||
bf.writeToken(e.endTagRaw, formatterTokenType_Tag)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// appendChild append an element to the element's children.
|
||||
func (e *tagElement) appendChild(child element) {
|
||||
e.children = append(e.children, child)
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package gohtml
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A textElement represents a text element of an HTML document.
|
||||
type textElement struct {
|
||||
text string
|
||||
parent *tagElement
|
||||
}
|
||||
|
||||
func (e *textElement) isInline() bool {
|
||||
// Text nodes are always considered to be inline
|
||||
return true
|
||||
}
|
||||
|
||||
// write writes a text to the buffer.
|
||||
func (e *textElement) write(bf *formattedBuffer, isPreviousNodeInline bool) bool {
|
||||
text := unifyLineFeed(e.text)
|
||||
if e.parent != nil && e.parent.isRaw {
|
||||
bf.writeToken(text, formatterTokenType_Text)
|
||||
return true
|
||||
}
|
||||
|
||||
if !isPreviousNodeInline {
|
||||
bf.writeLineFeed()
|
||||
}
|
||||
|
||||
// Collapse leading and trailing spaces
|
||||
text = regexp.MustCompile(`^\s+|\s+$`).ReplaceAllString(text, " ")
|
||||
lines := strings.Split(text, "\n")
|
||||
for l, line := range lines {
|
||||
if l > 0 {
|
||||
bf.writeLineFeed()
|
||||
}
|
||||
for _, word := range strings.Split(line, " ") {
|
||||
bf.writeToken(word, formatterTokenType_Text)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
package gohtml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type formatterTokenType int
|
||||
|
||||
const (
|
||||
formatterTokenType_Nothing formatterTokenType = iota
|
||||
formatterTokenType_Tag
|
||||
formatterTokenType_Text
|
||||
)
|
||||
|
||||
type formattedBuffer struct {
|
||||
buffer *bytes.Buffer
|
||||
rawMode bool
|
||||
|
||||
indentString string
|
||||
indentLevel int
|
||||
|
||||
lineWrapColumn int
|
||||
lineWrapMaxSpillover int
|
||||
|
||||
curLineLength int
|
||||
prevTokenType formatterTokenType
|
||||
}
|
||||
|
||||
func (bf *formattedBuffer) writeLineFeed() {
|
||||
if !bf.rawMode {
|
||||
// Strip trailing newlines
|
||||
bf.buffer = bytes.NewBuffer(bytes.TrimRightFunc(
|
||||
bf.buffer.Bytes(),
|
||||
func(r rune) bool {
|
||||
return r != '\n' && unicode.IsSpace(r)
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
bf.buffer.WriteString("\n")
|
||||
bf.curLineLength = 0
|
||||
bf.prevTokenType = formatterTokenType_Nothing
|
||||
}
|
||||
|
||||
func (bf *formattedBuffer) writeIndent() {
|
||||
bf.buffer.WriteString(strings.Repeat(bf.indentString, bf.indentLevel))
|
||||
bf.curLineLength += len(bf.indentString) * bf.indentLevel
|
||||
}
|
||||
|
||||
func (bf *formattedBuffer) writeToken(token string, kind formatterTokenType) {
|
||||
if bf.rawMode {
|
||||
bf.buffer.WriteString(token)
|
||||
bf.curLineLength += len(token)
|
||||
return
|
||||
}
|
||||
|
||||
if bf.prevTokenType == formatterTokenType_Nothing && strings.TrimSpace(token) == "" {
|
||||
// It's a whitespace token, but we already have indentation which functions
|
||||
// the same, so we ignore it
|
||||
return
|
||||
}
|
||||
|
||||
toWrite := token
|
||||
if kind == formatterTokenType_Text && bf.prevTokenType == formatterTokenType_Text {
|
||||
toWrite = " " + token
|
||||
}
|
||||
|
||||
if bf.prevTokenType != formatterTokenType_Nothing && bf.lineWrapColumn > 0 {
|
||||
switch {
|
||||
case bf.curLineLength > bf.lineWrapColumn:
|
||||
// Current line is too long
|
||||
fallthrough
|
||||
|
||||
case bf.curLineLength+len(toWrite) > bf.lineWrapColumn+bf.lineWrapMaxSpillover:
|
||||
// Current line + new token is too long even with allowed spillover
|
||||
fallthrough
|
||||
|
||||
case bf.curLineLength+len(toWrite) > bf.lineWrapColumn &&
|
||||
bf.curLineLength > bf.lineWrapColumn-bf.lineWrapMaxSpillover:
|
||||
// Current line + new token is too long and doesn't quality for spillover
|
||||
|
||||
bf.writeLineFeed()
|
||||
bf.writeToken(token, kind)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if bf.curLineLength == 0 {
|
||||
bf.writeIndent()
|
||||
}
|
||||
bf.buffer.WriteString(toWrite)
|
||||
bf.curLineLength += len(toWrite)
|
||||
bf.prevTokenType = kind
|
||||
}
|
||||
|
||||
// unifyLineFeed unifies line feeds.
|
||||
func unifyLineFeed(s string) string {
|
||||
return strings.Replace(strings.Replace(s, "\r\n", "\n", -1), "\r", "\n", -1)
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
box: wercker/golang@1.1.2
|
||||
# Build definition
|
||||
build:
|
||||
# The steps that will be executed on build
|
||||
steps:
|
||||
# Sets the go workspace and places you package
|
||||
# at the right place in the workspace tree
|
||||
- setup-go-workspace
|
||||
|
||||
# Gets the dependencies
|
||||
- script:
|
||||
name: go get
|
||||
code: |
|
||||
cd $WERCKER_SOURCE_DIR
|
||||
go version
|
||||
go get -t ./...
|
||||
|
||||
# Build the project
|
||||
- script:
|
||||
name: go build
|
||||
code: |
|
||||
go build ./...
|
||||
|
||||
# Test the project
|
||||
- script:
|
||||
name: go test
|
||||
code: |
|
||||
go test ./...
|
|
@ -0,0 +1,33 @@
|
|||
package gohtml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
)
|
||||
|
||||
// A Writer represents a formatted HTML source codes writer.
|
||||
type Writer struct {
|
||||
writer io.Writer
|
||||
lastElement string
|
||||
bf *bytes.Buffer
|
||||
}
|
||||
|
||||
// SetLastElement set the lastElement to the Writer.
|
||||
func (wr *Writer) SetLastElement(lastElement string) *Writer {
|
||||
wr.lastElement = lastElement
|
||||
return wr
|
||||
}
|
||||
|
||||
// Write writes the parameter.
|
||||
func (wr *Writer) Write(p []byte) (n int, err error) {
|
||||
n, _ = wr.bf.Write(p) // (*bytes.Buffer).Write never produces an error
|
||||
if bytes.HasSuffix(p, []byte(wr.lastElement)) {
|
||||
_, err = wr.writer.Write([]byte(Format(wr.bf.String()) + "\n"))
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// NewWriter generates a Writer and returns it.
|
||||
func NewWriter(wr io.Writer) *Writer {
|
||||
return &Writer{writer: wr, lastElement: defaultLastElement, bf: &bytes.Buffer{}}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,22 @@
|
|||
Additional IP Rights Grant (Patents)
|
||||
|
||||
"This implementation" means the copyrightable works distributed by
|
||||
Google as part of the Go project.
|
||||
|
||||
Google hereby grants to You a perpetual, worldwide, non-exclusive,
|
||||
no-charge, royalty-free, irrevocable (except as stated in this section)
|
||||
patent license to make, have made, use, offer to sell, sell, import,
|
||||
transfer and otherwise run, modify and propagate the contents of this
|
||||
implementation of Go, where such license applies only to those patent
|
||||
claims, both currently owned or controlled by Google and acquired in
|
||||
the future, licensable by Google that are necessarily infringed by this
|
||||
implementation of Go. This grant does not include claims that would be
|
||||
infringed only as a consequence of further modification of this
|
||||
implementation. If you or your agent or exclusive licensee institute or
|
||||
order or agree to the institution of patent litigation against any
|
||||
entity (including a cross-claim or counterclaim in a lawsuit) alleging
|
||||
that this implementation of Go or any code incorporated within this
|
||||
implementation of Go constitutes direct or contributory patent
|
||||
infringement, or inducement of patent infringement, then any patent
|
||||
rights granted to you under this License for this implementation of Go
|
||||
shall terminate as of the date such litigation is filed.
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package atom provides integer codes (also known as atoms) for a fixed set of
|
||||
// frequently occurring HTML strings: tag names and attribute keys such as "p"
|
||||
// and "id".
|
||||
//
|
||||
// Sharing an atom's name between all elements with the same tag can result in
|
||||
// fewer string allocations when tokenizing and parsing HTML. Integer
|
||||
// comparisons are also generally faster than string comparisons.
|
||||
//
|
||||
// The value of an atom's particular code is not guaranteed to stay the same
|
||||
// between versions of this package. Neither is any ordering guaranteed:
|
||||
// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
|
||||
// be dense. The only guarantees are that e.g. looking up "div" will yield
|
||||
// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
|
||||
package atom // import "golang.org/x/net/html/atom"
|
||||
|
||||
// Atom is an integer code for a string. The zero value maps to "".
|
||||
type Atom uint32
|
||||
|
||||
// String returns the atom's name.
|
||||
func (a Atom) String() string {
|
||||
start := uint32(a >> 8)
|
||||
n := uint32(a & 0xff)
|
||||
if start+n > uint32(len(atomText)) {
|
||||
return ""
|
||||
}
|
||||
return atomText[start : start+n]
|
||||
}
|
||||
|
||||
func (a Atom) string() string {
|
||||
return atomText[a>>8 : a>>8+a&0xff]
|
||||
}
|
||||
|
||||
// fnv computes the FNV hash with an arbitrary starting value h.
|
||||
func fnv(h uint32, s []byte) uint32 {
|
||||
for i := range s {
|
||||
h ^= uint32(s[i])
|
||||
h *= 16777619
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
func match(s string, t []byte) bool {
|
||||
for i, c := range t {
|
||||
if s[i] != c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Lookup returns the atom whose name is s. It returns zero if there is no
|
||||
// such atom. The lookup is case sensitive.
|
||||
func Lookup(s []byte) Atom {
|
||||
if len(s) == 0 || len(s) > maxAtomLen {
|
||||
return 0
|
||||
}
|
||||
h := fnv(hash0, s)
|
||||
if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
|
||||
return a
|
||||
}
|
||||
if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
|
||||
return a
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// String returns a string whose contents are equal to s. In that sense, it is
|
||||
// equivalent to string(s) but may be more efficient.
|
||||
func String(s []byte) string {
|
||||
if a := Lookup(s); a != 0 {
|
||||
return a.String()
|
||||
}
|
||||
return string(s)
|
||||
}
|
|
@ -0,0 +1,783 @@
|
|||
// Code generated by go generate gen.go; DO NOT EDIT.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
package atom
|
||||
|
||||
const (
|
||||
A Atom = 0x1
|
||||
Abbr Atom = 0x4
|
||||
Accept Atom = 0x1a06
|
||||
AcceptCharset Atom = 0x1a0e
|
||||
Accesskey Atom = 0x2c09
|
||||
Acronym Atom = 0xaa07
|
||||
Action Atom = 0x27206
|
||||
Address Atom = 0x6f307
|
||||
Align Atom = 0xb105
|
||||
Allowfullscreen Atom = 0x2080f
|
||||
Allowpaymentrequest Atom = 0xc113
|
||||
Allowusermedia Atom = 0xdd0e
|
||||
Alt Atom = 0xf303
|
||||
Annotation Atom = 0x1c90a
|
||||
AnnotationXml Atom = 0x1c90e
|
||||
Applet Atom = 0x31906
|
||||
Area Atom = 0x35604
|
||||
Article Atom = 0x3fc07
|
||||
As Atom = 0x3c02
|
||||
Aside Atom = 0x10705
|
||||
Async Atom = 0xff05
|
||||
Audio Atom = 0x11505
|
||||
Autocomplete Atom = 0x2780c
|
||||
Autofocus Atom = 0x12109
|
||||
Autoplay Atom = 0x13c08
|
||||
B Atom = 0x101
|
||||
Base Atom = 0x3b04
|
||||
Basefont Atom = 0x3b08
|
||||
Bdi Atom = 0xba03
|
||||
Bdo Atom = 0x14b03
|
||||
Bgsound Atom = 0x15e07
|
||||
Big Atom = 0x17003
|
||||
Blink Atom = 0x17305
|
||||
Blockquote Atom = 0x1870a
|
||||
Body Atom = 0x2804
|
||||
Br Atom = 0x202
|
||||
Button Atom = 0x19106
|
||||
Canvas Atom = 0x10306
|
||||
Caption Atom = 0x23107
|
||||
Center Atom = 0x22006
|
||||
Challenge Atom = 0x29b09
|
||||
Charset Atom = 0x2107
|
||||
Checked Atom = 0x47907
|
||||
Cite Atom = 0x19c04
|
||||
Class Atom = 0x56405
|
||||
Code Atom = 0x5c504
|
||||
Col Atom = 0x1ab03
|
||||
Colgroup Atom = 0x1ab08
|
||||
Color Atom = 0x1bf05
|
||||
Cols Atom = 0x1c404
|
||||
Colspan Atom = 0x1c407
|
||||
Command Atom = 0x1d707
|
||||
Content Atom = 0x58b07
|
||||
Contenteditable Atom = 0x58b0f
|
||||
Contextmenu Atom = 0x3800b
|
||||
Controls Atom = 0x1de08
|
||||
Coords Atom = 0x1ea06
|
||||
Crossorigin Atom = 0x1fb0b
|
||||
Data Atom = 0x4a504
|
||||
Datalist Atom = 0x4a508
|
||||
Datetime Atom = 0x2b808
|
||||
Dd Atom = 0x2d702
|
||||
Default Atom = 0x10a07
|
||||
Defer Atom = 0x5c705
|
||||
Del Atom = 0x45203
|
||||
Desc Atom = 0x56104
|
||||
Details Atom = 0x7207
|
||||
Dfn Atom = 0x8703
|
||||
Dialog Atom = 0xbb06
|
||||
Dir Atom = 0x9303
|
||||
Dirname Atom = 0x9307
|
||||
Disabled Atom = 0x16408
|
||||
Div Atom = 0x16b03
|
||||
Dl Atom = 0x5e602
|
||||
Download Atom = 0x46308
|
||||
Draggable Atom = 0x17a09
|
||||
Dropzone Atom = 0x40508
|
||||
Dt Atom = 0x64b02
|
||||
Em Atom = 0x6e02
|
||||
Embed Atom = 0x6e05
|
||||
Enctype Atom = 0x28d07
|
||||
Face Atom = 0x21e04
|
||||
Fieldset Atom = 0x22608
|
||||
Figcaption Atom = 0x22e0a
|
||||
Figure Atom = 0x24806
|
||||
Font Atom = 0x3f04
|
||||
Footer Atom = 0xf606
|
||||
For Atom = 0x25403
|
||||
ForeignObject Atom = 0x2540d
|
||||
Foreignobject Atom = 0x2610d
|
||||
Form Atom = 0x26e04
|
||||
Formaction Atom = 0x26e0a
|
||||
Formenctype Atom = 0x2890b
|
||||
Formmethod Atom = 0x2a40a
|
||||
Formnovalidate Atom = 0x2ae0e
|
||||
Formtarget Atom = 0x2c00a
|
||||
Frame Atom = 0x8b05
|
||||
Frameset Atom = 0x8b08
|
||||
H1 Atom = 0x15c02
|
||||
H2 Atom = 0x2de02
|
||||
H3 Atom = 0x30d02
|
||||
H4 Atom = 0x34502
|
||||
H5 Atom = 0x34f02
|
||||
H6 Atom = 0x64d02
|
||||
Head Atom = 0x33104
|
||||
Header Atom = 0x33106
|
||||
Headers Atom = 0x33107
|
||||
Height Atom = 0x5206
|
||||
Hgroup Atom = 0x2ca06
|
||||
Hidden Atom = 0x2d506
|
||||
High Atom = 0x2db04
|
||||
Hr Atom = 0x15702
|
||||
Href Atom = 0x2e004
|
||||
Hreflang Atom = 0x2e008
|
||||
Html Atom = 0x5604
|
||||
HttpEquiv Atom = 0x2e80a
|
||||
I Atom = 0x601
|
||||
Icon Atom = 0x58a04
|
||||
Id Atom = 0x10902
|
||||
Iframe Atom = 0x2fc06
|
||||
Image Atom = 0x30205
|
||||
Img Atom = 0x30703
|
||||
Input Atom = 0x44b05
|
||||
Inputmode Atom = 0x44b09
|
||||
Ins Atom = 0x20403
|
||||
Integrity Atom = 0x23f09
|
||||
Is Atom = 0x16502
|
||||
Isindex Atom = 0x30f07
|
||||
Ismap Atom = 0x31605
|
||||
Itemid Atom = 0x38b06
|
||||
Itemprop Atom = 0x19d08
|
||||
Itemref Atom = 0x3cd07
|
||||
Itemscope Atom = 0x67109
|
||||
Itemtype Atom = 0x31f08
|
||||
Kbd Atom = 0xb903
|
||||
Keygen Atom = 0x3206
|
||||
Keytype Atom = 0xd607
|
||||
Kind Atom = 0x17704
|
||||
Label Atom = 0x5905
|
||||
Lang Atom = 0x2e404
|
||||
Legend Atom = 0x18106
|
||||
Li Atom = 0xb202
|
||||
Link Atom = 0x17404
|
||||
List Atom = 0x4a904
|
||||
Listing Atom = 0x4a907
|
||||
Loop Atom = 0x5d04
|
||||
Low Atom = 0xc303
|
||||
Main Atom = 0x1004
|
||||
Malignmark Atom = 0xb00a
|
||||
Manifest Atom = 0x6d708
|
||||
Map Atom = 0x31803
|
||||
Mark Atom = 0xb604
|
||||
Marquee Atom = 0x32707
|
||||
Math Atom = 0x32e04
|
||||
Max Atom = 0x33d03
|
||||
Maxlength Atom = 0x33d09
|
||||
Media Atom = 0xe605
|
||||
Mediagroup Atom = 0xe60a
|
||||
Menu Atom = 0x38704
|
||||
Menuitem Atom = 0x38708
|
||||
Meta Atom = 0x4b804
|
||||
Meter Atom = 0x9805
|
||||
Method Atom = 0x2a806
|
||||
Mglyph Atom = 0x30806
|
||||
Mi Atom = 0x34702
|
||||
Min Atom = 0x34703
|
||||
Minlength Atom = 0x34709
|
||||
Mn Atom = 0x2b102
|
||||
Mo Atom = 0xa402
|
||||
Ms Atom = 0x67402
|
||||
Mtext Atom = 0x35105
|
||||
Multiple Atom = 0x35f08
|
||||
Muted Atom = 0x36705
|
||||
Name Atom = 0x9604
|
||||
Nav Atom = 0x1303
|
||||
Nobr Atom = 0x3704
|
||||
Noembed Atom = 0x6c07
|
||||
Noframes Atom = 0x8908
|
||||
Nomodule Atom = 0xa208
|
||||
Nonce Atom = 0x1a605
|
||||
Noscript Atom = 0x21608
|
||||
Novalidate Atom = 0x2b20a
|
||||
Object Atom = 0x26806
|
||||
Ol Atom = 0x13702
|
||||
Onabort Atom = 0x19507
|
||||
Onafterprint Atom = 0x2360c
|
||||
Onautocomplete Atom = 0x2760e
|
||||
Onautocompleteerror Atom = 0x27613
|
||||
Onauxclick Atom = 0x61f0a
|
||||
Onbeforeprint Atom = 0x69e0d
|
||||
Onbeforeunload Atom = 0x6e70e
|
||||
Onblur Atom = 0x56d06
|
||||
Oncancel Atom = 0x11908
|
||||
Oncanplay Atom = 0x14d09
|
||||
Oncanplaythrough Atom = 0x14d10
|
||||
Onchange Atom = 0x41b08
|
||||
Onclick Atom = 0x2f507
|
||||
Onclose Atom = 0x36c07
|
||||
Oncontextmenu Atom = 0x37e0d
|
||||
Oncopy Atom = 0x39106
|
||||
Oncuechange Atom = 0x3970b
|
||||
Oncut Atom = 0x3a205
|
||||
Ondblclick Atom = 0x3a70a
|
||||
Ondrag Atom = 0x3b106
|
||||
Ondragend Atom = 0x3b109
|
||||
Ondragenter Atom = 0x3ba0b
|
||||
Ondragexit Atom = 0x3c50a
|
||||
Ondragleave Atom = 0x3df0b
|
||||
Ondragover Atom = 0x3ea0a
|
||||
Ondragstart Atom = 0x3f40b
|
||||
Ondrop Atom = 0x40306
|
||||
Ondurationchange Atom = 0x41310
|
||||
Onemptied Atom = 0x40a09
|
||||
Onended Atom = 0x42307
|
||||
Onerror Atom = 0x42a07
|
||||
Onfocus Atom = 0x43107
|
||||
Onhashchange Atom = 0x43d0c
|
||||
Oninput Atom = 0x44907
|
||||
Oninvalid Atom = 0x45509
|
||||
Onkeydown Atom = 0x45e09
|
||||
Onkeypress Atom = 0x46b0a
|
||||
Onkeyup Atom = 0x48007
|
||||
Onlanguagechange Atom = 0x48d10
|
||||
Onload Atom = 0x49d06
|
||||
Onloadeddata Atom = 0x49d0c
|
||||
Onloadedmetadata Atom = 0x4b010
|
||||
Onloadend Atom = 0x4c609
|
||||
Onloadstart Atom = 0x4cf0b
|
||||
Onmessage Atom = 0x4da09
|
||||
Onmessageerror Atom = 0x4da0e
|
||||
Onmousedown Atom = 0x4e80b
|
||||
Onmouseenter Atom = 0x4f30c
|
||||
Onmouseleave Atom = 0x4ff0c
|
||||
Onmousemove Atom = 0x50b0b
|
||||
Onmouseout Atom = 0x5160a
|
||||
Onmouseover Atom = 0x5230b
|
||||
Onmouseup Atom = 0x52e09
|
||||
Onmousewheel Atom = 0x53c0c
|
||||
Onoffline Atom = 0x54809
|
||||
Ononline Atom = 0x55108
|
||||
Onpagehide Atom = 0x5590a
|
||||
Onpageshow Atom = 0x5730a
|
||||
Onpaste Atom = 0x57f07
|
||||
Onpause Atom = 0x59a07
|
||||
Onplay Atom = 0x5a406
|
||||
Onplaying Atom = 0x5a409
|
||||
Onpopstate Atom = 0x5ad0a
|
||||
Onprogress Atom = 0x5b70a
|
||||
Onratechange Atom = 0x5cc0c
|
||||
Onrejectionhandled Atom = 0x5d812
|
||||
Onreset Atom = 0x5ea07
|
||||
Onresize Atom = 0x5f108
|
||||
Onscroll Atom = 0x60008
|
||||
Onsecuritypolicyviolation Atom = 0x60819
|
||||
Onseeked Atom = 0x62908
|
||||
Onseeking Atom = 0x63109
|
||||
Onselect Atom = 0x63a08
|
||||
Onshow Atom = 0x64406
|
||||
Onsort Atom = 0x64f06
|
||||
Onstalled Atom = 0x65909
|
||||
Onstorage Atom = 0x66209
|
||||
Onsubmit Atom = 0x66b08
|
||||
Onsuspend Atom = 0x67b09
|
||||
Ontimeupdate Atom = 0x400c
|
||||
Ontoggle Atom = 0x68408
|
||||
Onunhandledrejection Atom = 0x68c14
|
||||
Onunload Atom = 0x6ab08
|
||||
Onvolumechange Atom = 0x6b30e
|
||||
Onwaiting Atom = 0x6c109
|
||||
Onwheel Atom = 0x6ca07
|
||||
Open Atom = 0x1a304
|
||||
Optgroup Atom = 0x5f08
|
||||
Optimum Atom = 0x6d107
|
||||
Option Atom = 0x6e306
|
||||
Output Atom = 0x51d06
|
||||
P Atom = 0xc01
|
||||
Param Atom = 0xc05
|
||||
Pattern Atom = 0x6607
|
||||
Picture Atom = 0x7b07
|
||||
Ping Atom = 0xef04
|
||||
Placeholder Atom = 0x1310b
|
||||
Plaintext Atom = 0x1b209
|
||||
Playsinline Atom = 0x1400b
|
||||
Poster Atom = 0x2cf06
|
||||
Pre Atom = 0x47003
|
||||
Preload Atom = 0x48607
|
||||
Progress Atom = 0x5b908
|
||||
Prompt Atom = 0x53606
|
||||
Public Atom = 0x58606
|
||||
Q Atom = 0xcf01
|
||||
Radiogroup Atom = 0x30a
|
||||
Rb Atom = 0x3a02
|
||||
Readonly Atom = 0x35708
|
||||
Referrerpolicy Atom = 0x3d10e
|
||||
Rel Atom = 0x48703
|
||||
Required Atom = 0x24c08
|
||||
Reversed Atom = 0x8008
|
||||
Rows Atom = 0x9c04
|
||||
Rowspan Atom = 0x9c07
|
||||
Rp Atom = 0x23c02
|
||||
Rt Atom = 0x19a02
|
||||
Rtc Atom = 0x19a03
|
||||
Ruby Atom = 0xfb04
|
||||
S Atom = 0x2501
|
||||
Samp Atom = 0x7804
|
||||
Sandbox Atom = 0x12907
|
||||
Scope Atom = 0x67505
|
||||
Scoped Atom = 0x67506
|
||||
Script Atom = 0x21806
|
||||
Seamless Atom = 0x37108
|
||||
Section Atom = 0x56807
|
||||
Select Atom = 0x63c06
|
||||
Selected Atom = 0x63c08
|
||||
Shape Atom = 0x1e505
|
||||
Size Atom = 0x5f504
|
||||
Sizes Atom = 0x5f505
|
||||
Slot Atom = 0x1ef04
|
||||
Small Atom = 0x20605
|
||||
Sortable Atom = 0x65108
|
||||
Sorted Atom = 0x33706
|
||||
Source Atom = 0x37806
|
||||
Spacer Atom = 0x43706
|
||||
Span Atom = 0x9f04
|
||||
Spellcheck Atom = 0x4740a
|
||||
Src Atom = 0x5c003
|
||||
Srcdoc Atom = 0x5c006
|
||||
Srclang Atom = 0x5f907
|
||||
Srcset Atom = 0x6f906
|
||||
Start Atom = 0x3fa05
|
||||
Step Atom = 0x58304
|
||||
Strike Atom = 0xd206
|
||||
Strong Atom = 0x6dd06
|
||||
Style Atom = 0x6ff05
|
||||
Sub Atom = 0x66d03
|
||||
Summary Atom = 0x70407
|
||||
Sup Atom = 0x70b03
|
||||
Svg Atom = 0x70e03
|
||||
System Atom = 0x71106
|
||||
Tabindex Atom = 0x4be08
|
||||
Table Atom = 0x59505
|
||||
Target Atom = 0x2c406
|
||||
Tbody Atom = 0x2705
|
||||
Td Atom = 0x9202
|
||||
Template Atom = 0x71408
|
||||
Textarea Atom = 0x35208
|
||||
Tfoot Atom = 0xf505
|
||||
Th Atom = 0x15602
|
||||
Thead Atom = 0x33005
|
||||
Time Atom = 0x4204
|
||||
Title Atom = 0x11005
|
||||
Tr Atom = 0xcc02
|
||||
Track Atom = 0x1ba05
|
||||
Translate Atom = 0x1f209
|
||||
Tt Atom = 0x6802
|
||||
Type Atom = 0xd904
|
||||
Typemustmatch Atom = 0x2900d
|
||||
U Atom = 0xb01
|
||||
Ul Atom = 0xa702
|
||||
Updateviacache Atom = 0x460e
|
||||
Usemap Atom = 0x59e06
|
||||
Value Atom = 0x1505
|
||||
Var Atom = 0x16d03
|
||||
Video Atom = 0x2f105
|
||||
Wbr Atom = 0x57c03
|
||||
Width Atom = 0x64905
|
||||
Workertype Atom = 0x71c0a
|
||||
Wrap Atom = 0x72604
|
||||
Xmp Atom = 0x12f03
|
||||
)
|
||||
|
||||
const hash0 = 0x81cdf10e
|
||||
|
||||
const maxAtomLen = 25
|
||||
|
||||
var table = [1 << 9]Atom{
|
||||
0x1: 0xe60a, // mediagroup
|
||||
0x2: 0x2e404, // lang
|
||||
0x4: 0x2c09, // accesskey
|
||||
0x5: 0x8b08, // frameset
|
||||
0x7: 0x63a08, // onselect
|
||||
0x8: 0x71106, // system
|
||||
0xa: 0x64905, // width
|
||||
0xc: 0x2890b, // formenctype
|
||||
0xd: 0x13702, // ol
|
||||
0xe: 0x3970b, // oncuechange
|
||||
0x10: 0x14b03, // bdo
|
||||
0x11: 0x11505, // audio
|
||||
0x12: 0x17a09, // draggable
|
||||
0x14: 0x2f105, // video
|
||||
0x15: 0x2b102, // mn
|
||||
0x16: 0x38704, // menu
|
||||
0x17: 0x2cf06, // poster
|
||||
0x19: 0xf606, // footer
|
||||
0x1a: 0x2a806, // method
|
||||
0x1b: 0x2b808, // datetime
|
||||
0x1c: 0x19507, // onabort
|
||||
0x1d: 0x460e, // updateviacache
|
||||
0x1e: 0xff05, // async
|
||||
0x1f: 0x49d06, // onload
|
||||
0x21: 0x11908, // oncancel
|
||||
0x22: 0x62908, // onseeked
|
||||
0x23: 0x30205, // image
|
||||
0x24: 0x5d812, // onrejectionhandled
|
||||
0x26: 0x17404, // link
|
||||
0x27: 0x51d06, // output
|
||||
0x28: 0x33104, // head
|
||||
0x29: 0x4ff0c, // onmouseleave
|
||||
0x2a: 0x57f07, // onpaste
|
||||
0x2b: 0x5a409, // onplaying
|
||||
0x2c: 0x1c407, // colspan
|
||||
0x2f: 0x1bf05, // color
|
||||
0x30: 0x5f504, // size
|
||||
0x31: 0x2e80a, // http-equiv
|
||||
0x33: 0x601, // i
|
||||
0x34: 0x5590a, // onpagehide
|
||||
0x35: 0x68c14, // onunhandledrejection
|
||||
0x37: 0x42a07, // onerror
|
||||
0x3a: 0x3b08, // basefont
|
||||
0x3f: 0x1303, // nav
|
||||
0x40: 0x17704, // kind
|
||||
0x41: 0x35708, // readonly
|
||||
0x42: 0x30806, // mglyph
|
||||
0x44: 0xb202, // li
|
||||
0x46: 0x2d506, // hidden
|
||||
0x47: 0x70e03, // svg
|
||||
0x48: 0x58304, // step
|
||||
0x49: 0x23f09, // integrity
|
||||
0x4a: 0x58606, // public
|
||||
0x4c: 0x1ab03, // col
|
||||
0x4d: 0x1870a, // blockquote
|
||||
0x4e: 0x34f02, // h5
|
||||
0x50: 0x5b908, // progress
|
||||
0x51: 0x5f505, // sizes
|
||||
0x52: 0x34502, // h4
|
||||
0x56: 0x33005, // thead
|
||||
0x57: 0xd607, // keytype
|
||||
0x58: 0x5b70a, // onprogress
|
||||
0x59: 0x44b09, // inputmode
|
||||
0x5a: 0x3b109, // ondragend
|
||||
0x5d: 0x3a205, // oncut
|
||||
0x5e: 0x43706, // spacer
|
||||
0x5f: 0x1ab08, // colgroup
|
||||
0x62: 0x16502, // is
|
||||
0x65: 0x3c02, // as
|
||||
0x66: 0x54809, // onoffline
|
||||
0x67: 0x33706, // sorted
|
||||
0x69: 0x48d10, // onlanguagechange
|
||||
0x6c: 0x43d0c, // onhashchange
|
||||
0x6d: 0x9604, // name
|
||||
0x6e: 0xf505, // tfoot
|
||||
0x6f: 0x56104, // desc
|
||||
0x70: 0x33d03, // max
|
||||
0x72: 0x1ea06, // coords
|
||||
0x73: 0x30d02, // h3
|
||||
0x74: 0x6e70e, // onbeforeunload
|
||||
0x75: 0x9c04, // rows
|
||||
0x76: 0x63c06, // select
|
||||
0x77: 0x9805, // meter
|
||||
0x78: 0x38b06, // itemid
|
||||
0x79: 0x53c0c, // onmousewheel
|
||||
0x7a: 0x5c006, // srcdoc
|
||||
0x7d: 0x1ba05, // track
|
||||
0x7f: 0x31f08, // itemtype
|
||||
0x82: 0xa402, // mo
|
||||
0x83: 0x41b08, // onchange
|
||||
0x84: 0x33107, // headers
|
||||
0x85: 0x5cc0c, // onratechange
|
||||
0x86: 0x60819, // onsecuritypolicyviolation
|
||||
0x88: 0x4a508, // datalist
|
||||
0x89: 0x4e80b, // onmousedown
|
||||
0x8a: 0x1ef04, // slot
|
||||
0x8b: 0x4b010, // onloadedmetadata
|
||||
0x8c: 0x1a06, // accept
|
||||
0x8d: 0x26806, // object
|
||||
0x91: 0x6b30e, // onvolumechange
|
||||
0x92: 0x2107, // charset
|
||||
0x93: 0x27613, // onautocompleteerror
|
||||
0x94: 0xc113, // allowpaymentrequest
|
||||
0x95: 0x2804, // body
|
||||
0x96: 0x10a07, // default
|
||||
0x97: 0x63c08, // selected
|
||||
0x98: 0x21e04, // face
|
||||
0x99: 0x1e505, // shape
|
||||
0x9b: 0x68408, // ontoggle
|
||||
0x9e: 0x64b02, // dt
|
||||
0x9f: 0xb604, // mark
|
||||
0xa1: 0xb01, // u
|
||||
0xa4: 0x6ab08, // onunload
|
||||
0xa5: 0x5d04, // loop
|
||||
0xa6: 0x16408, // disabled
|
||||
0xaa: 0x42307, // onended
|
||||
0xab: 0xb00a, // malignmark
|
||||
0xad: 0x67b09, // onsuspend
|
||||
0xae: 0x35105, // mtext
|
||||
0xaf: 0x64f06, // onsort
|
||||
0xb0: 0x19d08, // itemprop
|
||||
0xb3: 0x67109, // itemscope
|
||||
0xb4: 0x17305, // blink
|
||||
0xb6: 0x3b106, // ondrag
|
||||
0xb7: 0xa702, // ul
|
||||
0xb8: 0x26e04, // form
|
||||
0xb9: 0x12907, // sandbox
|
||||
0xba: 0x8b05, // frame
|
||||
0xbb: 0x1505, // value
|
||||
0xbc: 0x66209, // onstorage
|
||||
0xbf: 0xaa07, // acronym
|
||||
0xc0: 0x19a02, // rt
|
||||
0xc2: 0x202, // br
|
||||
0xc3: 0x22608, // fieldset
|
||||
0xc4: 0x2900d, // typemustmatch
|
||||
0xc5: 0xa208, // nomodule
|
||||
0xc6: 0x6c07, // noembed
|
||||
0xc7: 0x69e0d, // onbeforeprint
|
||||
0xc8: 0x19106, // button
|
||||
0xc9: 0x2f507, // onclick
|
||||
0xca: 0x70407, // summary
|
||||
0xcd: 0xfb04, // ruby
|
||||
0xce: 0x56405, // class
|
||||
0xcf: 0x3f40b, // ondragstart
|
||||
0xd0: 0x23107, // caption
|
||||
0xd4: 0xdd0e, // allowusermedia
|
||||
0xd5: 0x4cf0b, // onloadstart
|
||||
0xd9: 0x16b03, // div
|
||||
0xda: 0x4a904, // list
|
||||
0xdb: 0x32e04, // math
|
||||
0xdc: 0x44b05, // input
|
||||
0xdf: 0x3ea0a, // ondragover
|
||||
0xe0: 0x2de02, // h2
|
||||
0xe2: 0x1b209, // plaintext
|
||||
0xe4: 0x4f30c, // onmouseenter
|
||||
0xe7: 0x47907, // checked
|
||||
0xe8: 0x47003, // pre
|
||||
0xea: 0x35f08, // multiple
|
||||
0xeb: 0xba03, // bdi
|
||||
0xec: 0x33d09, // maxlength
|
||||
0xed: 0xcf01, // q
|
||||
0xee: 0x61f0a, // onauxclick
|
||||
0xf0: 0x57c03, // wbr
|
||||
0xf2: 0x3b04, // base
|
||||
0xf3: 0x6e306, // option
|
||||
0xf5: 0x41310, // ondurationchange
|
||||
0xf7: 0x8908, // noframes
|
||||
0xf9: 0x40508, // dropzone
|
||||
0xfb: 0x67505, // scope
|
||||
0xfc: 0x8008, // reversed
|
||||
0xfd: 0x3ba0b, // ondragenter
|
||||
0xfe: 0x3fa05, // start
|
||||
0xff: 0x12f03, // xmp
|
||||
0x100: 0x5f907, // srclang
|
||||
0x101: 0x30703, // img
|
||||
0x104: 0x101, // b
|
||||
0x105: 0x25403, // for
|
||||
0x106: 0x10705, // aside
|
||||
0x107: 0x44907, // oninput
|
||||
0x108: 0x35604, // area
|
||||
0x109: 0x2a40a, // formmethod
|
||||
0x10a: 0x72604, // wrap
|
||||
0x10c: 0x23c02, // rp
|
||||
0x10d: 0x46b0a, // onkeypress
|
||||
0x10e: 0x6802, // tt
|
||||
0x110: 0x34702, // mi
|
||||
0x111: 0x36705, // muted
|
||||
0x112: 0xf303, // alt
|
||||
0x113: 0x5c504, // code
|
||||
0x114: 0x6e02, // em
|
||||
0x115: 0x3c50a, // ondragexit
|
||||
0x117: 0x9f04, // span
|
||||
0x119: 0x6d708, // manifest
|
||||
0x11a: 0x38708, // menuitem
|
||||
0x11b: 0x58b07, // content
|
||||
0x11d: 0x6c109, // onwaiting
|
||||
0x11f: 0x4c609, // onloadend
|
||||
0x121: 0x37e0d, // oncontextmenu
|
||||
0x123: 0x56d06, // onblur
|
||||
0x124: 0x3fc07, // article
|
||||
0x125: 0x9303, // dir
|
||||
0x126: 0xef04, // ping
|
||||
0x127: 0x24c08, // required
|
||||
0x128: 0x45509, // oninvalid
|
||||
0x129: 0xb105, // align
|
||||
0x12b: 0x58a04, // icon
|
||||
0x12c: 0x64d02, // h6
|
||||
0x12d: 0x1c404, // cols
|
||||
0x12e: 0x22e0a, // figcaption
|
||||
0x12f: 0x45e09, // onkeydown
|
||||
0x130: 0x66b08, // onsubmit
|
||||
0x131: 0x14d09, // oncanplay
|
||||
0x132: 0x70b03, // sup
|
||||
0x133: 0xc01, // p
|
||||
0x135: 0x40a09, // onemptied
|
||||
0x136: 0x39106, // oncopy
|
||||
0x137: 0x19c04, // cite
|
||||
0x138: 0x3a70a, // ondblclick
|
||||
0x13a: 0x50b0b, // onmousemove
|
||||
0x13c: 0x66d03, // sub
|
||||
0x13d: 0x48703, // rel
|
||||
0x13e: 0x5f08, // optgroup
|
||||
0x142: 0x9c07, // rowspan
|
||||
0x143: 0x37806, // source
|
||||
0x144: 0x21608, // noscript
|
||||
0x145: 0x1a304, // open
|
||||
0x146: 0x20403, // ins
|
||||
0x147: 0x2540d, // foreignObject
|
||||
0x148: 0x5ad0a, // onpopstate
|
||||
0x14a: 0x28d07, // enctype
|
||||
0x14b: 0x2760e, // onautocomplete
|
||||
0x14c: 0x35208, // textarea
|
||||
0x14e: 0x2780c, // autocomplete
|
||||
0x14f: 0x15702, // hr
|
||||
0x150: 0x1de08, // controls
|
||||
0x151: 0x10902, // id
|
||||
0x153: 0x2360c, // onafterprint
|
||||
0x155: 0x2610d, // foreignobject
|
||||
0x156: 0x32707, // marquee
|
||||
0x157: 0x59a07, // onpause
|
||||
0x158: 0x5e602, // dl
|
||||
0x159: 0x5206, // height
|
||||
0x15a: 0x34703, // min
|
||||
0x15b: 0x9307, // dirname
|
||||
0x15c: 0x1f209, // translate
|
||||
0x15d: 0x5604, // html
|
||||
0x15e: 0x34709, // minlength
|
||||
0x15f: 0x48607, // preload
|
||||
0x160: 0x71408, // template
|
||||
0x161: 0x3df0b, // ondragleave
|
||||
0x162: 0x3a02, // rb
|
||||
0x164: 0x5c003, // src
|
||||
0x165: 0x6dd06, // strong
|
||||
0x167: 0x7804, // samp
|
||||
0x168: 0x6f307, // address
|
||||
0x169: 0x55108, // ononline
|
||||
0x16b: 0x1310b, // placeholder
|
||||
0x16c: 0x2c406, // target
|
||||
0x16d: 0x20605, // small
|
||||
0x16e: 0x6ca07, // onwheel
|
||||
0x16f: 0x1c90a, // annotation
|
||||
0x170: 0x4740a, // spellcheck
|
||||
0x171: 0x7207, // details
|
||||
0x172: 0x10306, // canvas
|
||||
0x173: 0x12109, // autofocus
|
||||
0x174: 0xc05, // param
|
||||
0x176: 0x46308, // download
|
||||
0x177: 0x45203, // del
|
||||
0x178: 0x36c07, // onclose
|
||||
0x179: 0xb903, // kbd
|
||||
0x17a: 0x31906, // applet
|
||||
0x17b: 0x2e004, // href
|
||||
0x17c: 0x5f108, // onresize
|
||||
0x17e: 0x49d0c, // onloadeddata
|
||||
0x180: 0xcc02, // tr
|
||||
0x181: 0x2c00a, // formtarget
|
||||
0x182: 0x11005, // title
|
||||
0x183: 0x6ff05, // style
|
||||
0x184: 0xd206, // strike
|
||||
0x185: 0x59e06, // usemap
|
||||
0x186: 0x2fc06, // iframe
|
||||
0x187: 0x1004, // main
|
||||
0x189: 0x7b07, // picture
|
||||
0x18c: 0x31605, // ismap
|
||||
0x18e: 0x4a504, // data
|
||||
0x18f: 0x5905, // label
|
||||
0x191: 0x3d10e, // referrerpolicy
|
||||
0x192: 0x15602, // th
|
||||
0x194: 0x53606, // prompt
|
||||
0x195: 0x56807, // section
|
||||
0x197: 0x6d107, // optimum
|
||||
0x198: 0x2db04, // high
|
||||
0x199: 0x15c02, // h1
|
||||
0x19a: 0x65909, // onstalled
|
||||
0x19b: 0x16d03, // var
|
||||
0x19c: 0x4204, // time
|
||||
0x19e: 0x67402, // ms
|
||||
0x19f: 0x33106, // header
|
||||
0x1a0: 0x4da09, // onmessage
|
||||
0x1a1: 0x1a605, // nonce
|
||||
0x1a2: 0x26e0a, // formaction
|
||||
0x1a3: 0x22006, // center
|
||||
0x1a4: 0x3704, // nobr
|
||||
0x1a5: 0x59505, // table
|
||||
0x1a6: 0x4a907, // listing
|
||||
0x1a7: 0x18106, // legend
|
||||
0x1a9: 0x29b09, // challenge
|
||||
0x1aa: 0x24806, // figure
|
||||
0x1ab: 0xe605, // media
|
||||
0x1ae: 0xd904, // type
|
||||
0x1af: 0x3f04, // font
|
||||
0x1b0: 0x4da0e, // onmessageerror
|
||||
0x1b1: 0x37108, // seamless
|
||||
0x1b2: 0x8703, // dfn
|
||||
0x1b3: 0x5c705, // defer
|
||||
0x1b4: 0xc303, // low
|
||||
0x1b5: 0x19a03, // rtc
|
||||
0x1b6: 0x5230b, // onmouseover
|
||||
0x1b7: 0x2b20a, // novalidate
|
||||
0x1b8: 0x71c0a, // workertype
|
||||
0x1ba: 0x3cd07, // itemref
|
||||
0x1bd: 0x1, // a
|
||||
0x1be: 0x31803, // map
|
||||
0x1bf: 0x400c, // ontimeupdate
|
||||
0x1c0: 0x15e07, // bgsound
|
||||
0x1c1: 0x3206, // keygen
|
||||
0x1c2: 0x2705, // tbody
|
||||
0x1c5: 0x64406, // onshow
|
||||
0x1c7: 0x2501, // s
|
||||
0x1c8: 0x6607, // pattern
|
||||
0x1cc: 0x14d10, // oncanplaythrough
|
||||
0x1ce: 0x2d702, // dd
|
||||
0x1cf: 0x6f906, // srcset
|
||||
0x1d0: 0x17003, // big
|
||||
0x1d2: 0x65108, // sortable
|
||||
0x1d3: 0x48007, // onkeyup
|
||||
0x1d5: 0x5a406, // onplay
|
||||
0x1d7: 0x4b804, // meta
|
||||
0x1d8: 0x40306, // ondrop
|
||||
0x1da: 0x60008, // onscroll
|
||||
0x1db: 0x1fb0b, // crossorigin
|
||||
0x1dc: 0x5730a, // onpageshow
|
||||
0x1dd: 0x4, // abbr
|
||||
0x1de: 0x9202, // td
|
||||
0x1df: 0x58b0f, // contenteditable
|
||||
0x1e0: 0x27206, // action
|
||||
0x1e1: 0x1400b, // playsinline
|
||||
0x1e2: 0x43107, // onfocus
|
||||
0x1e3: 0x2e008, // hreflang
|
||||
0x1e5: 0x5160a, // onmouseout
|
||||
0x1e6: 0x5ea07, // onreset
|
||||
0x1e7: 0x13c08, // autoplay
|
||||
0x1e8: 0x63109, // onseeking
|
||||
0x1ea: 0x67506, // scoped
|
||||
0x1ec: 0x30a, // radiogroup
|
||||
0x1ee: 0x3800b, // contextmenu
|
||||
0x1ef: 0x52e09, // onmouseup
|
||||
0x1f1: 0x2ca06, // hgroup
|
||||
0x1f2: 0x2080f, // allowfullscreen
|
||||
0x1f3: 0x4be08, // tabindex
|
||||
0x1f6: 0x30f07, // isindex
|
||||
0x1f7: 0x1a0e, // accept-charset
|
||||
0x1f8: 0x2ae0e, // formnovalidate
|
||||
0x1fb: 0x1c90e, // annotation-xml
|
||||
0x1fc: 0x6e05, // embed
|
||||
0x1fd: 0x21806, // script
|
||||
0x1fe: 0xbb06, // dialog
|
||||
0x1ff: 0x1d707, // command
|
||||
}
|
||||
|
||||
const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
|
||||
"asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" +
|
||||
"sampictureversedfnoframesetdirnameterowspanomoduleacronymali" +
|
||||
"gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" +
|
||||
"ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" +
|
||||
"dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
|
||||
"bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
|
||||
"penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
|
||||
"ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
|
||||
"ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
|
||||
"ignObjectforeignobjectformactionautocompleteerrorformenctype" +
|
||||
"mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
|
||||
"osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
|
||||
"ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
|
||||
"inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
|
||||
"extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
|
||||
"enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
|
||||
"articleondropzonemptiedondurationchangeonendedonerroronfocus" +
|
||||
"paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
|
||||
"spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
|
||||
"onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
|
||||
"usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
|
||||
"seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
|
||||
"classectionbluronpageshowbronpastepublicontenteditableonpaus" +
|
||||
"emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
|
||||
"jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
|
||||
"violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
|
||||
"tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
|
||||
"handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
|
||||
"wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
|
||||
"arysupsvgsystemplateworkertypewrap"
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
// Section 12.2.4.2 of the HTML5 specification says "The following elements
|
||||
// have varying levels of special parsing rules".
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
|
||||
var isSpecialElementMap = map[string]bool{
|
||||
"address": true,
|
||||
"applet": true,
|
||||
"area": true,
|
||||
"article": true,
|
||||
"aside": true,
|
||||
"base": true,
|
||||
"basefont": true,
|
||||
"bgsound": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"button": true,
|
||||
"caption": true,
|
||||
"center": true,
|
||||
"col": true,
|
||||
"colgroup": true,
|
||||
"dd": true,
|
||||
"details": true,
|
||||
"dir": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"embed": true,
|
||||
"fieldset": true,
|
||||
"figcaption": true,
|
||||
"figure": true,
|
||||
"footer": true,
|
||||
"form": true,
|
||||
"frame": true,
|
||||
"frameset": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"header": true,
|
||||
"hgroup": true,
|
||||
"hr": true,
|
||||
"html": true,
|
||||
"iframe": true,
|
||||
"img": true,
|
||||
"input": true,
|
||||
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
|
||||
"li": true,
|
||||
"link": true,
|
||||
"listing": true,
|
||||
"main": true,
|
||||
"marquee": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nav": true,
|
||||
"noembed": true,
|
||||
"noframes": true,
|
||||
"noscript": true,
|
||||
"object": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"param": true,
|
||||
"plaintext": true,
|
||||
"pre": true,
|
||||
"script": true,
|
||||
"section": true,
|
||||
"select": true,
|
||||
"source": true,
|
||||
"style": true,
|
||||
"summary": true,
|
||||
"table": true,
|
||||
"tbody": true,
|
||||
"td": true,
|
||||
"template": true,
|
||||
"textarea": true,
|
||||
"tfoot": true,
|
||||
"th": true,
|
||||
"thead": true,
|
||||
"title": true,
|
||||
"tr": true,
|
||||
"track": true,
|
||||
"ul": true,
|
||||
"wbr": true,
|
||||
"xmp": true,
|
||||
}
|
||||
|
||||
func isSpecialElement(element *Node) bool {
|
||||
switch element.Namespace {
|
||||
case "", "html":
|
||||
return isSpecialElementMap[element.Data]
|
||||
case "math":
|
||||
switch element.Data {
|
||||
case "mi", "mo", "mn", "ms", "mtext", "annotation-xml":
|
||||
return true
|
||||
}
|
||||
case "svg":
|
||||
switch element.Data {
|
||||
case "foreignObject", "desc", "title":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package html implements an HTML5-compliant tokenizer and parser.
|
||||
|
||||
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
|
||||
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
|
||||
|
||||
z := html.NewTokenizer(r)
|
||||
|
||||
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
|
||||
which parses the next token and returns its type, or an error:
|
||||
|
||||
for {
|
||||
tt := z.Next()
|
||||
if tt == html.ErrorToken {
|
||||
// ...
|
||||
return ...
|
||||
}
|
||||
// Process the current token.
|
||||
}
|
||||
|
||||
There are two APIs for retrieving the current token. The high-level API is to
|
||||
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
|
||||
allow optionally calling Raw after Next but before Token, Text, TagName, or
|
||||
TagAttr. In EBNF notation, the valid call sequence per token is:
|
||||
|
||||
Next {Raw} [ Token | Text | TagName {TagAttr} ]
|
||||
|
||||
Token returns an independent data structure that completely describes a token.
|
||||
Entities (such as "<") are unescaped, tag names and attribute keys are
|
||||
lower-cased, and attributes are collected into a []Attribute. For example:
|
||||
|
||||
for {
|
||||
if z.Next() == html.ErrorToken {
|
||||
// Returning io.EOF indicates success.
|
||||
return z.Err()
|
||||
}
|
||||
emitToken(z.Token())
|
||||
}
|
||||
|
||||
The low-level API performs fewer allocations and copies, but the contents of
|
||||
the []byte values returned by Text, TagName and TagAttr may change on the next
|
||||
call to Next. For example, to extract an HTML page's anchor text:
|
||||
|
||||
depth := 0
|
||||
for {
|
||||
tt := z.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
return z.Err()
|
||||
case html.TextToken:
|
||||
if depth > 0 {
|
||||
// emitBytes should copy the []byte it receives,
|
||||
// if it doesn't process it immediately.
|
||||
emitBytes(z.Text())
|
||||
}
|
||||
case html.StartTagToken, html.EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if len(tn) == 1 && tn[0] == 'a' {
|
||||
if tt == html.StartTagToken {
|
||||
depth++
|
||||
} else {
|
||||
depth--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Parsing is done by calling Parse with an io.Reader, which returns the root of
|
||||
the parse tree (the document element) as a *Node. It is the caller's
|
||||
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
|
||||
example, to process each anchor node in depth-first order:
|
||||
|
||||
doc, err := html.Parse(r)
|
||||
if err != nil {
|
||||
// ...
|
||||
}
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
// Do something with n...
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
f(doc)
|
||||
|
||||
The relevant specifications include:
|
||||
https://html.spec.whatwg.org/multipage/syntax.html and
|
||||
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
|
||||
*/
|
||||
package html // import "golang.org/x/net/html"
|
||||
|
||||
// The tokenization algorithm implemented by this package is not a line-by-line
|
||||
// transliteration of the relatively verbose state-machine in the WHATWG
|
||||
// specification. A more direct approach is used instead, where the program
|
||||
// counter implies the state, such as whether it is tokenizing a tag or a text
|
||||
// node. Specification compliance is verified by checking expected and actual
|
||||
// outputs over a test suite rather than aiming for algorithmic fidelity.
|
||||
|
||||
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
|
||||
// TODO(nigeltao): How does parsing interact with a JavaScript engine?
|
|
@ -0,0 +1,156 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// parseDoctype parses the data from a DoctypeToken into a name,
|
||||
// public identifier, and system identifier. It returns a Node whose Type
|
||||
// is DoctypeNode, whose Data is the name, and which has attributes
|
||||
// named "system" and "public" for the two identifiers if they were present.
|
||||
// quirks is whether the document should be parsed in "quirks mode".
|
||||
func parseDoctype(s string) (n *Node, quirks bool) {
|
||||
n = &Node{Type: DoctypeNode}
|
||||
|
||||
// Find the name.
|
||||
space := strings.IndexAny(s, whitespace)
|
||||
if space == -1 {
|
||||
space = len(s)
|
||||
}
|
||||
n.Data = s[:space]
|
||||
// The comparison to "html" is case-sensitive.
|
||||
if n.Data != "html" {
|
||||
quirks = true
|
||||
}
|
||||
n.Data = strings.ToLower(n.Data)
|
||||
s = strings.TrimLeft(s[space:], whitespace)
|
||||
|
||||
if len(s) < 6 {
|
||||
// It can't start with "PUBLIC" or "SYSTEM".
|
||||
// Ignore the rest of the string.
|
||||
return n, quirks || s != ""
|
||||
}
|
||||
|
||||
key := strings.ToLower(s[:6])
|
||||
s = s[6:]
|
||||
for key == "public" || key == "system" {
|
||||
s = strings.TrimLeft(s, whitespace)
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
quote := s[0]
|
||||
if quote != '"' && quote != '\'' {
|
||||
break
|
||||
}
|
||||
s = s[1:]
|
||||
q := strings.IndexRune(s, rune(quote))
|
||||
var id string
|
||||
if q == -1 {
|
||||
id = s
|
||||
s = ""
|
||||
} else {
|
||||
id = s[:q]
|
||||
s = s[q+1:]
|
||||
}
|
||||
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
|
||||
if key == "public" {
|
||||
key = "system"
|
||||
} else {
|
||||
key = ""
|
||||
}
|
||||
}
|
||||
|
||||
if key != "" || s != "" {
|
||||
quirks = true
|
||||
} else if len(n.Attr) > 0 {
|
||||
if n.Attr[0].Key == "public" {
|
||||
public := strings.ToLower(n.Attr[0].Val)
|
||||
switch public {
|
||||
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
|
||||
quirks = true
|
||||
default:
|
||||
for _, q := range quirkyIDs {
|
||||
if strings.HasPrefix(public, q) {
|
||||
quirks = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// The following two public IDs only cause quirks mode if there is no system ID.
|
||||
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
|
||||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
|
||||
quirks = true
|
||||
}
|
||||
}
|
||||
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
|
||||
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
|
||||
quirks = true
|
||||
}
|
||||
}
|
||||
|
||||
return n, quirks
|
||||
}
|
||||
|
||||
// quirkyIDs is a list of public doctype identifiers that cause a document
|
||||
// to be interpreted in quirks mode. The identifiers should be in lower case.
|
||||
var quirkyIDs = []string{
|
||||
"+//silmaril//dtd html pro v0r11 19970101//",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//",
|
||||
"-//ietf//dtd html 2.0 level 1//",
|
||||
"-//ietf//dtd html 2.0 level 2//",
|
||||
"-//ietf//dtd html 2.0 strict level 1//",
|
||||
"-//ietf//dtd html 2.0 strict level 2//",
|
||||
"-//ietf//dtd html 2.0 strict//",
|
||||
"-//ietf//dtd html 2.0//",
|
||||
"-//ietf//dtd html 2.1e//",
|
||||
"-//ietf//dtd html 3.0//",
|
||||
"-//ietf//dtd html 3.2 final//",
|
||||
"-//ietf//dtd html 3.2//",
|
||||
"-//ietf//dtd html 3//",
|
||||
"-//ietf//dtd html level 0//",
|
||||
"-//ietf//dtd html level 1//",
|
||||
"-//ietf//dtd html level 2//",
|
||||
"-//ietf//dtd html level 3//",
|
||||
"-//ietf//dtd html strict level 0//",
|
||||
"-//ietf//dtd html strict level 1//",
|
||||
"-//ietf//dtd html strict level 2//",
|
||||
"-//ietf//dtd html strict level 3//",
|
||||
"-//ietf//dtd html strict//",
|
||||
"-//ietf//dtd html//",
|
||||
"-//metrius//dtd metrius presentational//",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//",
|
||||
"-//netscape comm. corp.//dtd html//",
|
||||
"-//netscape comm. corp.//dtd strict html//",
|
||||
"-//o'reilly and associates//dtd html 2.0//",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//",
|
||||
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
||||
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
|
||||
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
|
||||
"-//spyglass//dtd html 2.0 extended//",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//",
|
||||
"-//sun microsystems corp.//dtd hotjava html//",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//",
|
||||
"-//w3c//dtd html 3 1995-03-24//",
|
||||
"-//w3c//dtd html 3.2 draft//",
|
||||
"-//w3c//dtd html 3.2 final//",
|
||||
"-//w3c//dtd html 3.2//",
|
||||
"-//w3c//dtd html 3.2s draft//",
|
||||
"-//w3c//dtd html 4.0 frameset//",
|
||||
"-//w3c//dtd html 4.0 transitional//",
|
||||
"-//w3c//dtd html experimental 19960712//",
|
||||
"-//w3c//dtd html experimental 970421//",
|
||||
"-//w3c//dtd w3 html//",
|
||||
"-//w3o//dtd w3 html 3.0//",
|
||||
"-//webtechs//dtd mozilla html 2.0//",
|
||||
"-//webtechs//dtd mozilla html//",
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,258 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// These replacements permit compatibility with old numeric entities that
|
||||
// assumed Windows-1252 encoding.
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
|
||||
var replacementTable = [...]rune{
|
||||
'\u20AC', // First entry is what 0x80 should be replaced with.
|
||||
'\u0081',
|
||||
'\u201A',
|
||||
'\u0192',
|
||||
'\u201E',
|
||||
'\u2026',
|
||||
'\u2020',
|
||||
'\u2021',
|
||||
'\u02C6',
|
||||
'\u2030',
|
||||
'\u0160',
|
||||
'\u2039',
|
||||
'\u0152',
|
||||
'\u008D',
|
||||
'\u017D',
|
||||
'\u008F',
|
||||
'\u0090',
|
||||
'\u2018',
|
||||
'\u2019',
|
||||
'\u201C',
|
||||
'\u201D',
|
||||
'\u2022',
|
||||
'\u2013',
|
||||
'\u2014',
|
||||
'\u02DC',
|
||||
'\u2122',
|
||||
'\u0161',
|
||||
'\u203A',
|
||||
'\u0153',
|
||||
'\u009D',
|
||||
'\u017E',
|
||||
'\u0178', // Last entry is 0x9F.
|
||||
// 0x00->'\uFFFD' is handled programmatically.
|
||||
// 0x0D->'\u000D' is a no-op.
|
||||
}
|
||||
|
||||
// unescapeEntity reads an entity like "<" from b[src:] and writes the
|
||||
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
|
||||
// Precondition: b[src] == '&' && dst <= src.
|
||||
// attribute should be true if parsing an attribute value.
|
||||
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
|
||||
|
||||
// i starts at 1 because we already know that s[0] == '&'.
|
||||
i, s := 1, b[src:]
|
||||
|
||||
if len(s) <= 1 {
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if s[i] == '#' {
|
||||
if len(s) <= 3 { // We need to have at least "&#.".
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
i++
|
||||
c := s[i]
|
||||
hex := false
|
||||
if c == 'x' || c == 'X' {
|
||||
hex = true
|
||||
i++
|
||||
}
|
||||
|
||||
x := '\x00'
|
||||
for i < len(s) {
|
||||
c = s[i]
|
||||
i++
|
||||
if hex {
|
||||
if '0' <= c && c <= '9' {
|
||||
x = 16*x + rune(c) - '0'
|
||||
continue
|
||||
} else if 'a' <= c && c <= 'f' {
|
||||
x = 16*x + rune(c) - 'a' + 10
|
||||
continue
|
||||
} else if 'A' <= c && c <= 'F' {
|
||||
x = 16*x + rune(c) - 'A' + 10
|
||||
continue
|
||||
}
|
||||
} else if '0' <= c && c <= '9' {
|
||||
x = 10*x + rune(c) - '0'
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if i <= 3 { // No characters matched.
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if 0x80 <= x && x <= 0x9F {
|
||||
// Replace characters from Windows-1252 with UTF-8 equivalents.
|
||||
x = replacementTable[x-0x80]
|
||||
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
|
||||
// Replace invalid characters with the replacement character.
|
||||
x = '\uFFFD'
|
||||
}
|
||||
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
}
|
||||
|
||||
// Consume the maximum number of characters possible, with the
|
||||
// consumed characters matching one of the named references.
|
||||
|
||||
for i < len(s) {
|
||||
c := s[i]
|
||||
i++
|
||||
// Lower-cased characters are more common in entities, so we check for them first.
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
entityName := string(s[1:i])
|
||||
if entityName == "" {
|
||||
// No-op.
|
||||
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
|
||||
// No-op.
|
||||
} else if x := entity[entityName]; x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
} else if x := entity2[entityName]; x[0] != 0 {
|
||||
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
|
||||
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
|
||||
} else if !attribute {
|
||||
maxLen := len(entityName) - 1
|
||||
if maxLen > longestEntityWithoutSemicolon {
|
||||
maxLen = longestEntityWithoutSemicolon
|
||||
}
|
||||
for j := maxLen; j > 1; j-- {
|
||||
if x := entity[entityName[:j]]; x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dst1, src1 = dst+i, src+i
|
||||
copy(b[dst:dst1], b[src:src1])
|
||||
return dst1, src1
|
||||
}
|
||||
|
||||
// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b".
|
||||
// attribute should be true if parsing an attribute value.
|
||||
func unescape(b []byte, attribute bool) []byte {
|
||||
for i, c := range b {
|
||||
if c == '&' {
|
||||
dst, src := unescapeEntity(b, i, i, attribute)
|
||||
for src < len(b) {
|
||||
c := b[src]
|
||||
if c == '&' {
|
||||
dst, src = unescapeEntity(b, dst, src, attribute)
|
||||
} else {
|
||||
b[dst] = c
|
||||
dst, src = dst+1, src+1
|
||||
}
|
||||
}
|
||||
return b[0:dst]
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
|
||||
func lower(b []byte) []byte {
|
||||
for i, c := range b {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
b[i] = c + 'a' - 'A'
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
const escapedChars = "&'<>\"\r"
|
||||
|
||||
func escape(w writer, s string) error {
|
||||
i := strings.IndexAny(s, escapedChars)
|
||||
for i != -1 {
|
||||
if _, err := w.WriteString(s[:i]); err != nil {
|
||||
return err
|
||||
}
|
||||
var esc string
|
||||
switch s[i] {
|
||||
case '&':
|
||||
esc = "&"
|
||||
case '\'':
|
||||
// "'" is shorter than "'" and apos was not in HTML until HTML5.
|
||||
esc = "'"
|
||||
case '<':
|
||||
esc = "<"
|
||||
case '>':
|
||||
esc = ">"
|
||||
case '"':
|
||||
// """ is shorter than """.
|
||||
esc = """
|
||||
case '\r':
|
||||
esc = " "
|
||||
default:
|
||||
panic("unrecognized escape character")
|
||||
}
|
||||
s = s[i+1:]
|
||||
if _, err := w.WriteString(esc); err != nil {
|
||||
return err
|
||||
}
|
||||
i = strings.IndexAny(s, escapedChars)
|
||||
}
|
||||
_, err := w.WriteString(s)
|
||||
return err
|
||||
}
|
||||
|
||||
// EscapeString escapes special characters like "<" to become "<". It
|
||||
// escapes only five such characters: <, >, &, ' and ".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func EscapeString(s string) string {
|
||||
if strings.IndexAny(s, escapedChars) == -1 {
|
||||
return s
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
escape(&buf, s)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// UnescapeString unescapes entities like "<" to become "<". It unescapes a
|
||||
// larger range of entities than EscapeString escapes. For example, "á"
|
||||
// unescapes to "á", as does "á" and "&xE1;".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func UnescapeString(s string) string {
|
||||
for _, c := range s {
|
||||
if c == '&' {
|
||||
return string(unescape([]byte(s), false))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
|
@ -0,0 +1,222 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
|
||||
for i := range aa {
|
||||
if newName, ok := nameMap[aa[i].Key]; ok {
|
||||
aa[i].Key = newName
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func adjustForeignAttributes(aa []Attribute) {
|
||||
for i, a := range aa {
|
||||
if a.Key == "" || a.Key[0] != 'x' {
|
||||
continue
|
||||
}
|
||||
switch a.Key {
|
||||
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
|
||||
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
|
||||
j := strings.Index(a.Key, ":")
|
||||
aa[i].Namespace = a.Key[:j]
|
||||
aa[i].Key = a.Key[j+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func htmlIntegrationPoint(n *Node) bool {
|
||||
if n.Type != ElementNode {
|
||||
return false
|
||||
}
|
||||
switch n.Namespace {
|
||||
case "math":
|
||||
if n.Data == "annotation-xml" {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "encoding" {
|
||||
val := strings.ToLower(a.Val)
|
||||
if val == "text/html" || val == "application/xhtml+xml" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case "svg":
|
||||
switch n.Data {
|
||||
case "desc", "foreignObject", "title":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func mathMLTextIntegrationPoint(n *Node) bool {
|
||||
if n.Namespace != "math" {
|
||||
return false
|
||||
}
|
||||
switch n.Data {
|
||||
case "mi", "mo", "mn", "ms", "mtext":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.6.5.
|
||||
var breakout = map[string]bool{
|
||||
"b": true,
|
||||
"big": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"center": true,
|
||||
"code": true,
|
||||
"dd": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"em": true,
|
||||
"embed": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"hr": true,
|
||||
"i": true,
|
||||
"img": true,
|
||||
"li": true,
|
||||
"listing": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nobr": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"pre": true,
|
||||
"ruby": true,
|
||||
"s": true,
|
||||
"small": true,
|
||||
"span": true,
|
||||
"strong": true,
|
||||
"strike": true,
|
||||
"sub": true,
|
||||
"sup": true,
|
||||
"table": true,
|
||||
"tt": true,
|
||||
"u": true,
|
||||
"ul": true,
|
||||
"var": true,
|
||||
}
|
||||
|
||||
// Section 12.2.6.5.
|
||||
var svgTagNameAdjustments = map[string]string{
|
||||
"altglyph": "altGlyph",
|
||||
"altglyphdef": "altGlyphDef",
|
||||
"altglyphitem": "altGlyphItem",
|
||||
"animatecolor": "animateColor",
|
||||
"animatemotion": "animateMotion",
|
||||
"animatetransform": "animateTransform",
|
||||
"clippath": "clipPath",
|
||||
"feblend": "feBlend",
|
||||
"fecolormatrix": "feColorMatrix",
|
||||
"fecomponenttransfer": "feComponentTransfer",
|
||||
"fecomposite": "feComposite",
|
||||
"feconvolvematrix": "feConvolveMatrix",
|
||||
"fediffuselighting": "feDiffuseLighting",
|
||||
"fedisplacementmap": "feDisplacementMap",
|
||||
"fedistantlight": "feDistantLight",
|
||||
"feflood": "feFlood",
|
||||
"fefunca": "feFuncA",
|
||||
"fefuncb": "feFuncB",
|
||||
"fefuncg": "feFuncG",
|
||||
"fefuncr": "feFuncR",
|
||||
"fegaussianblur": "feGaussianBlur",
|
||||
"feimage": "feImage",
|
||||
"femerge": "feMerge",
|
||||
"femergenode": "feMergeNode",
|
||||
"femorphology": "feMorphology",
|
||||
"feoffset": "feOffset",
|
||||
"fepointlight": "fePointLight",
|
||||
"fespecularlighting": "feSpecularLighting",
|
||||
"fespotlight": "feSpotLight",
|
||||
"fetile": "feTile",
|
||||
"feturbulence": "feTurbulence",
|
||||
"foreignobject": "foreignObject",
|
||||
"glyphref": "glyphRef",
|
||||
"lineargradient": "linearGradient",
|
||||
"radialgradient": "radialGradient",
|
||||
"textpath": "textPath",
|
||||
}
|
||||
|
||||
// Section 12.2.6.1
|
||||
var mathMLAttributeAdjustments = map[string]string{
|
||||
"definitionurl": "definitionURL",
|
||||
}
|
||||
|
||||
var svgAttributeAdjustments = map[string]string{
|
||||
"attributename": "attributeName",
|
||||
"attributetype": "attributeType",
|
||||
"basefrequency": "baseFrequency",
|
||||
"baseprofile": "baseProfile",
|
||||
"calcmode": "calcMode",
|
||||
"clippathunits": "clipPathUnits",
|
||||
"diffuseconstant": "diffuseConstant",
|
||||
"edgemode": "edgeMode",
|
||||
"filterunits": "filterUnits",
|
||||
"glyphref": "glyphRef",
|
||||
"gradienttransform": "gradientTransform",
|
||||
"gradientunits": "gradientUnits",
|
||||
"kernelmatrix": "kernelMatrix",
|
||||
"kernelunitlength": "kernelUnitLength",
|
||||
"keypoints": "keyPoints",
|
||||
"keysplines": "keySplines",
|
||||
"keytimes": "keyTimes",
|
||||
"lengthadjust": "lengthAdjust",
|
||||
"limitingconeangle": "limitingConeAngle",
|
||||
"markerheight": "markerHeight",
|
||||
"markerunits": "markerUnits",
|
||||
"markerwidth": "markerWidth",
|
||||
"maskcontentunits": "maskContentUnits",
|
||||
"maskunits": "maskUnits",
|
||||
"numoctaves": "numOctaves",
|
||||
"pathlength": "pathLength",
|
||||
"patterncontentunits": "patternContentUnits",
|
||||
"patterntransform": "patternTransform",
|
||||
"patternunits": "patternUnits",
|
||||
"pointsatx": "pointsAtX",
|
||||
"pointsaty": "pointsAtY",
|
||||
"pointsatz": "pointsAtZ",
|
||||
"preservealpha": "preserveAlpha",
|
||||
"preserveaspectratio": "preserveAspectRatio",
|
||||
"primitiveunits": "primitiveUnits",
|
||||
"refx": "refX",
|
||||
"refy": "refY",
|
||||
"repeatcount": "repeatCount",
|
||||
"repeatdur": "repeatDur",
|
||||
"requiredextensions": "requiredExtensions",
|
||||
"requiredfeatures": "requiredFeatures",
|
||||
"specularconstant": "specularConstant",
|
||||
"specularexponent": "specularExponent",
|
||||
"spreadmethod": "spreadMethod",
|
||||
"startoffset": "startOffset",
|
||||
"stddeviation": "stdDeviation",
|
||||
"stitchtiles": "stitchTiles",
|
||||
"surfacescale": "surfaceScale",
|
||||
"systemlanguage": "systemLanguage",
|
||||
"tablevalues": "tableValues",
|
||||
"targetx": "targetX",
|
||||
"targety": "targetY",
|
||||
"textlength": "textLength",
|
||||
"viewbox": "viewBox",
|
||||
"viewtarget": "viewTarget",
|
||||
"xchannelselector": "xChannelSelector",
|
||||
"ychannelselector": "yChannelSelector",
|
||||
"zoomandpan": "zoomAndPan",
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// A NodeType is the type of a Node.
|
||||
type NodeType uint32
|
||||
|
||||
const (
|
||||
ErrorNode NodeType = iota
|
||||
TextNode
|
||||
DocumentNode
|
||||
ElementNode
|
||||
CommentNode
|
||||
DoctypeNode
|
||||
// RawNode nodes are not returned by the parser, but can be part of the
|
||||
// Node tree passed to func Render to insert raw HTML (without escaping).
|
||||
// If so, this package makes no guarantee that the rendered HTML is secure
|
||||
// (from e.g. Cross Site Scripting attacks) or well-formed.
|
||||
RawNode
|
||||
scopeMarkerNode
|
||||
)
|
||||
|
||||
// Section 12.2.4.3 says "The markers are inserted when entering applet,
|
||||
// object, marquee, template, td, th, and caption elements, and are used
|
||||
// to prevent formatting from "leaking" into applet, object, marquee,
|
||||
// template, td, th, and caption elements".
|
||||
var scopeMarker = Node{Type: scopeMarkerNode}
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
||||
// content for text) and are part of a tree of Nodes. Element nodes may also
|
||||
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
|
||||
// that it looks like "a<b" rather than "a<b". For element nodes, DataAtom
|
||||
// is the atom for Data, or zero if Data is not a known tag name.
|
||||
//
|
||||
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
|
||||
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
|
||||
// "svg" is short for "http://www.w3.org/2000/svg".
|
||||
type Node struct {
|
||||
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
|
||||
|
||||
Type NodeType
|
||||
DataAtom atom.Atom
|
||||
Data string
|
||||
Namespace string
|
||||
Attr []Attribute
|
||||
}
|
||||
|
||||
// InsertBefore inserts newChild as a child of n, immediately before oldChild
|
||||
// in the sequence of n's children. oldChild may be nil, in which case newChild
|
||||
// is appended to the end of n's children.
|
||||
//
|
||||
// It will panic if newChild already has a parent or siblings.
|
||||
func (n *Node) InsertBefore(newChild, oldChild *Node) {
|
||||
if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
|
||||
panic("html: InsertBefore called for an attached child Node")
|
||||
}
|
||||
var prev, next *Node
|
||||
if oldChild != nil {
|
||||
prev, next = oldChild.PrevSibling, oldChild
|
||||
} else {
|
||||
prev = n.LastChild
|
||||
}
|
||||
if prev != nil {
|
||||
prev.NextSibling = newChild
|
||||
} else {
|
||||
n.FirstChild = newChild
|
||||
}
|
||||
if next != nil {
|
||||
next.PrevSibling = newChild
|
||||
} else {
|
||||
n.LastChild = newChild
|
||||
}
|
||||
newChild.Parent = n
|
||||
newChild.PrevSibling = prev
|
||||
newChild.NextSibling = next
|
||||
}
|
||||
|
||||
// AppendChild adds a node c as a child of n.
|
||||
//
|
||||
// It will panic if c already has a parent or siblings.
|
||||
func (n *Node) AppendChild(c *Node) {
|
||||
if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
|
||||
panic("html: AppendChild called for an attached child Node")
|
||||
}
|
||||
last := n.LastChild
|
||||
if last != nil {
|
||||
last.NextSibling = c
|
||||
} else {
|
||||
n.FirstChild = c
|
||||
}
|
||||
n.LastChild = c
|
||||
c.Parent = n
|
||||
c.PrevSibling = last
|
||||
}
|
||||
|
||||
// RemoveChild removes a node c that is a child of n. Afterwards, c will have
|
||||
// no parent and no siblings.
|
||||
//
|
||||
// It will panic if c's parent is not n.
|
||||
func (n *Node) RemoveChild(c *Node) {
|
||||
if c.Parent != n {
|
||||
panic("html: RemoveChild called for a non-child Node")
|
||||
}
|
||||
if n.FirstChild == c {
|
||||
n.FirstChild = c.NextSibling
|
||||
}
|
||||
if c.NextSibling != nil {
|
||||
c.NextSibling.PrevSibling = c.PrevSibling
|
||||
}
|
||||
if n.LastChild == c {
|
||||
n.LastChild = c.PrevSibling
|
||||
}
|
||||
if c.PrevSibling != nil {
|
||||
c.PrevSibling.NextSibling = c.NextSibling
|
||||
}
|
||||
c.Parent = nil
|
||||
c.PrevSibling = nil
|
||||
c.NextSibling = nil
|
||||
}
|
||||
|
||||
// reparentChildren reparents all of src's child nodes to dst.
|
||||
func reparentChildren(dst, src *Node) {
|
||||
for {
|
||||
child := src.FirstChild
|
||||
if child == nil {
|
||||
break
|
||||
}
|
||||
src.RemoveChild(child)
|
||||
dst.AppendChild(child)
|
||||
}
|
||||
}
|
||||
|
||||
// clone returns a new node with the same type, data and attributes.
|
||||
// The clone has no parent, no siblings and no children.
|
||||
func (n *Node) clone() *Node {
|
||||
m := &Node{
|
||||
Type: n.Type,
|
||||
DataAtom: n.DataAtom,
|
||||
Data: n.Data,
|
||||
Attr: make([]Attribute, len(n.Attr)),
|
||||
}
|
||||
copy(m.Attr, n.Attr)
|
||||
return m
|
||||
}
|
||||
|
||||
// nodeStack is a stack of nodes.
|
||||
type nodeStack []*Node
|
||||
|
||||
// pop pops the stack. It will panic if s is empty.
|
||||
func (s *nodeStack) pop() *Node {
|
||||
i := len(*s)
|
||||
n := (*s)[i-1]
|
||||
*s = (*s)[:i-1]
|
||||
return n
|
||||
}
|
||||
|
||||
// top returns the most recently pushed node, or nil if s is empty.
|
||||
func (s *nodeStack) top() *Node {
|
||||
if i := len(*s); i > 0 {
|
||||
return (*s)[i-1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// index returns the index of the top-most occurrence of n in the stack, or -1
|
||||
// if n is not present.
|
||||
func (s *nodeStack) index(n *Node) int {
|
||||
for i := len(*s) - 1; i >= 0; i-- {
|
||||
if (*s)[i] == n {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// contains returns whether a is within s.
|
||||
func (s *nodeStack) contains(a atom.Atom) bool {
|
||||
for _, n := range *s {
|
||||
if n.DataAtom == a && n.Namespace == "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// insert inserts a node at the given index.
|
||||
func (s *nodeStack) insert(i int, n *Node) {
|
||||
(*s) = append(*s, nil)
|
||||
copy((*s)[i+1:], (*s)[i:])
|
||||
(*s)[i] = n
|
||||
}
|
||||
|
||||
// remove removes a node from the stack. It is a no-op if n is not present.
|
||||
func (s *nodeStack) remove(n *Node) {
|
||||
i := s.index(n)
|
||||
if i == -1 {
|
||||
return
|
||||
}
|
||||
copy((*s)[i:], (*s)[i+1:])
|
||||
j := len(*s) - 1
|
||||
(*s)[j] = nil
|
||||
*s = (*s)[:j]
|
||||
}
|
||||
|
||||
type insertionModeStack []insertionMode
|
||||
|
||||
func (s *insertionModeStack) pop() (im insertionMode) {
|
||||
i := len(*s)
|
||||
im = (*s)[i-1]
|
||||
*s = (*s)[:i-1]
|
||||
return im
|
||||
}
|
||||
|
||||
func (s *insertionModeStack) top() insertionMode {
|
||||
if i := len(*s); i > 0 {
|
||||
return (*s)[i-1]
|
||||
}
|
||||
return nil
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,273 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type writer interface {
|
||||
io.Writer
|
||||
io.ByteWriter
|
||||
WriteString(string) (int, error)
|
||||
}
|
||||
|
||||
// Render renders the parse tree n to the given writer.
|
||||
//
|
||||
// Rendering is done on a 'best effort' basis: calling Parse on the output of
|
||||
// Render will always result in something similar to the original tree, but it
|
||||
// is not necessarily an exact clone unless the original tree was 'well-formed'.
|
||||
// 'Well-formed' is not easily specified; the HTML5 specification is
|
||||
// complicated.
|
||||
//
|
||||
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
|
||||
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
|
||||
// For example, in a 'well-formed' parse tree, no <a> element is a child of
|
||||
// another <a> element: parsing "<a><a>" results in two sibling elements.
|
||||
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
|
||||
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
|
||||
// children; the <a> is reparented to the <table>'s parent. However, calling
|
||||
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
|
||||
// element with an <a> child, and is therefore not 'well-formed'.
|
||||
//
|
||||
// Programmatically constructed trees are typically also 'well-formed', but it
|
||||
// is possible to construct a tree that looks innocuous but, when rendered and
|
||||
// re-parsed, results in a different tree. A simple example is that a solitary
|
||||
// text node would become a tree containing <html>, <head> and <body> elements.
|
||||
// Another example is that the programmatic equivalent of "a<head>b</head>c"
|
||||
// becomes "<html><head><head/><body>abc</body></html>".
|
||||
func Render(w io.Writer, n *Node) error {
|
||||
if x, ok := w.(writer); ok {
|
||||
return render(x, n)
|
||||
}
|
||||
buf := bufio.NewWriter(w)
|
||||
if err := render(buf, n); err != nil {
|
||||
return err
|
||||
}
|
||||
return buf.Flush()
|
||||
}
|
||||
|
||||
// plaintextAbort is returned from render1 when a <plaintext> element
|
||||
// has been rendered. No more end tags should be rendered after that.
|
||||
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
|
||||
|
||||
func render(w writer, n *Node) error {
|
||||
err := render1(w, n)
|
||||
if err == plaintextAbort {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func render1(w writer, n *Node) error {
|
||||
// Render non-element nodes; these are the easy cases.
|
||||
switch n.Type {
|
||||
case ErrorNode:
|
||||
return errors.New("html: cannot render an ErrorNode node")
|
||||
case TextNode:
|
||||
return escape(w, n.Data)
|
||||
case DocumentNode:
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case ElementNode:
|
||||
// No-op.
|
||||
case CommentNode:
|
||||
if _, err := w.WriteString("<!--"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString("-->"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
case DoctypeNode:
|
||||
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.Attr != nil {
|
||||
var p, s string
|
||||
for _, a := range n.Attr {
|
||||
switch a.Key {
|
||||
case "public":
|
||||
p = a.Val
|
||||
case "system":
|
||||
s = a.Val
|
||||
}
|
||||
}
|
||||
if p != "" {
|
||||
if _, err := w.WriteString(" PUBLIC "); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, p); err != nil {
|
||||
return err
|
||||
}
|
||||
if s != "" {
|
||||
if err := w.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if s != "" {
|
||||
if _, err := w.WriteString(" SYSTEM "); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return w.WriteByte('>')
|
||||
case RawNode:
|
||||
_, err := w.WriteString(n.Data)
|
||||
return err
|
||||
default:
|
||||
return errors.New("html: unknown node type")
|
||||
}
|
||||
|
||||
// Render the <xxx> opening tag.
|
||||
if err := w.WriteByte('<'); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
if err := w.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
if a.Namespace != "" {
|
||||
if _, err := w.WriteString(a.Namespace); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte(':'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := w.WriteString(a.Key); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(`="`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := escape(w, a.Val); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte('"'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if voidElements[n.Data] {
|
||||
if n.FirstChild != nil {
|
||||
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
|
||||
}
|
||||
_, err := w.WriteString("/>")
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte('>'); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add initial newline where there is danger of a newline beging ignored.
|
||||
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
|
||||
switch n.Data {
|
||||
case "pre", "listing", "textarea":
|
||||
if err := w.WriteByte('\n'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render any child nodes.
|
||||
switch n.Data {
|
||||
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type == TextNode {
|
||||
if _, err := w.WriteString(c.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if n.Data == "plaintext" {
|
||||
// Don't render anything else. <plaintext> must be the
|
||||
// last element in the file, with no closing tag.
|
||||
return plaintextAbort
|
||||
}
|
||||
default:
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render the </xxx> closing tag.
|
||||
if _, err := w.WriteString("</"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
return w.WriteByte('>')
|
||||
}
|
||||
|
||||
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
|
||||
// quotes, but if s contains a double quote, it will use single quotes.
|
||||
// It is used for writing the identifiers in a doctype declaration.
|
||||
// In valid HTML, they can't contain both types of quotes.
|
||||
func writeQuoted(w writer, s string) error {
|
||||
var q byte = '"'
|
||||
if strings.Contains(s, `"`) {
|
||||
q = '\''
|
||||
}
|
||||
if err := w.WriteByte(q); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte(q); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
|
||||
// are those that can't have any contents.
|
||||
var voidElements = map[string]bool{
|
||||
"area": true,
|
||||
"base": true,
|
||||
"br": true,
|
||||
"col": true,
|
||||
"embed": true,
|
||||
"hr": true,
|
||||
"img": true,
|
||||
"input": true,
|
||||
"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
|
||||
"link": true,
|
||||
"meta": true,
|
||||
"param": true,
|
||||
"source": true,
|
||||
"track": true,
|
||||
"wbr": true,
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -13,6 +13,9 @@ github.com/spf13/cobra
|
|||
# github.com/spf13/pflag v1.0.5
|
||||
## explicit; go 1.12
|
||||
github.com/spf13/pflag
|
||||
# github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4
|
||||
## explicit
|
||||
github.com/yosssi/gohtml
|
||||
# golang.org/x/image v0.1.0
|
||||
## explicit; go 1.12
|
||||
golang.org/x/image/bmp
|
||||
|
@ -20,3 +23,7 @@ golang.org/x/image/riff
|
|||
golang.org/x/image/vp8
|
||||
golang.org/x/image/vp8l
|
||||
golang.org/x/image/webp
|
||||
# golang.org/x/net v0.0.0-20220722155237-a158d28d115b
|
||||
## explicit; go 1.17
|
||||
golang.org/x/net/html
|
||||
golang.org/x/net/html/atom
|
||||
|
|
Loading…
Reference in New Issue