source: code/trunk/yukari.go@ 146

Last change on this file since 146 was 146, checked in by Izuru Yakumo, 16 months ago

Insane

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 32.7 KB
RevLine 
[142]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "marisa.chaotic.ninja/yukari/config"
31 "marisa.chaotic.ninja/yukari/contenttype"
32)
33
34const (
35 STATE_DEFAULT int = 0
36 STATE_IN_STYLE int = 1
37 STATE_IN_NOSCRIPT int = 2
38)
39
40const VERSION = "v0.2.1"
41
42const MAX_REDIRECT_COUNT = 5
43
44var CLIENT *fasthttp.Client = &fasthttp.Client{
45 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
46 ReadBufferSize: 16 * 1024, // 16K
47}
48
49var cfg *config.Config = config.DefaultConfig
50
51var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
52 // html
53 contenttype.NewFilterEquals("text", "html", ""),
54 contenttype.NewFilterEquals("application", "xhtml", "xml"),
55 // css
56 contenttype.NewFilterEquals("text", "css", ""),
57 // images
58 contenttype.NewFilterEquals("image", "gif", ""),
59 contenttype.NewFilterEquals("image", "png", ""),
60 contenttype.NewFilterEquals("image", "jpeg", ""),
61 contenttype.NewFilterEquals("image", "pjpeg", ""),
62 contenttype.NewFilterEquals("image", "webp", ""),
63 contenttype.NewFilterEquals("image", "tiff", ""),
64 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
65 contenttype.NewFilterEquals("image", "bmp", ""),
66 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
67 contenttype.NewFilterEquals("image", "x-icon", ""),
68 contenttype.NewFilterEquals("image", "svg", "xml"),
69 // fonts
70 contenttype.NewFilterEquals("application", "font-otf", ""),
71 contenttype.NewFilterEquals("application", "font-ttf", ""),
72 contenttype.NewFilterEquals("application", "font-woff", ""),
73 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
74})
75
76var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
77 // texts
78 contenttype.NewFilterEquals("text", "csv", ""),
79 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
80 contenttype.NewFilterEquals("text", "plain", ""),
81 // API
82 contenttype.NewFilterEquals("application", "json", ""),
83 // Documents
84 contenttype.NewFilterEquals("application", "x-latex", ""),
85 contenttype.NewFilterEquals("application", "pdf", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
88 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
89 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
90 // Compressed archives
91 contenttype.NewFilterEquals("application", "zip", ""),
92 contenttype.NewFilterEquals("application", "gzip", ""),
93 contenttype.NewFilterEquals("application", "x-compressed", ""),
94 contenttype.NewFilterEquals("application", "x-gtar", ""),
95 contenttype.NewFilterEquals("application", "x-compress", ""),
96 // Generic binary
97 contenttype.NewFilterEquals("application", "octet-stream", ""),
98})
99
100var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
101 "charset": true,
102}
103
104var UNSAFE_ELEMENTS [][]byte = [][]byte{
105 []byte("applet"),
106 []byte("canvas"),
107 []byte("embed"),
108 []byte("math"),
109 []byte("script"),
110 []byte("svg"),
111}
112
113var SAFE_ATTRIBUTES [][]byte = [][]byte{
114 []byte("abbr"),
115 []byte("accesskey"),
116 []byte("align"),
117 []byte("alt"),
118 []byte("as"),
119 []byte("autocomplete"),
120 []byte("charset"),
121 []byte("checked"),
122 []byte("class"),
123 []byte("content"),
124 []byte("contenteditable"),
125 []byte("contextmenu"),
126 []byte("dir"),
127 []byte("for"),
128 []byte("height"),
129 []byte("hidden"),
130 []byte("hreflang"),
131 []byte("id"),
132 []byte("lang"),
133 []byte("media"),
134 []byte("method"),
135 []byte("name"),
136 []byte("nowrap"),
137 []byte("placeholder"),
138 []byte("property"),
139 []byte("rel"),
140 []byte("spellcheck"),
141 []byte("tabindex"),
142 []byte("target"),
143 []byte("title"),
144 []byte("translate"),
145 []byte("type"),
146 []byte("value"),
147 []byte("width"),
148}
149
150var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
151 []byte("alternate"),
152 []byte("archives"),
153 []byte("author"),
154 []byte("copyright"),
155 []byte("first"),
156 []byte("help"),
157 []byte("icon"),
158 []byte("index"),
159 []byte("last"),
160 []byte("license"),
161 []byte("manifest"),
162 []byte("next"),
163 []byte("pingback"),
164 []byte("prev"),
165 []byte("publisher"),
166 []byte("search"),
167 []byte("shortcut icon"),
168 []byte("stylesheet"),
169 []byte("up"),
170}
171
172var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
173 // X-UA-Compatible will be added automaticaly, so it can be skipped
174 []byte("date"),
175 []byte("last-modified"),
176 []byte("refresh"), // URL rewrite
177 []byte("content-language"),
178}
179
180var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
181
182type Proxy struct {
183 Key []byte
184 RequestTimeout time.Duration
185 FollowRedirect bool
186}
187
188type RequestConfig struct {
189 Key []byte
190 BaseURL *url.URL
191 BodyInjected bool
192}
193
194type HTMLBodyExtParam struct {
195 BaseURL string
196 HasYukariKey bool
197 URLParamName string
198}
199
200type HTMLFormExtParam struct {
201 BaseURL string
202 YukariHash string
203 URLParamName string
204 HashParamName string
205}
206type HTMLMainPageFormParam struct {
207 URLParamName string
208}
209
210var HTML_FORM_EXTENSION *template.Template
211var HTML_BODY_EXTENSION *template.Template
212var HTML_MAIN_PAGE_FORM *template.Template
213var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
214<meta http-equiv="X-UA-Compatible" content="IE=edge">
215<meta name="referrer" content="no-referrer">
216`
217
218var YUKARI_HTML_PAGE_START string = `<!doctype html>
219<html>
[143]220 <head>
221 <title>Yukari's Gap</title>
222 <meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
223 <style>
224 html { height: 100%; }
[146]225 body { min-height : 100%; display: flex; flex-direction:column; font-family: sans-serif; text-align: center; color: #BC4BFC; background: #240039; margin: 0;
[143]226 padding: 0; font-size: 1.1em; }
[144]227 input { border: 1px solid #888; padding: 0.3em; color: #BC4BFC; background: #202020; font-size: 1.1em; }
[143]228 input[placeholder] { width:80%; }
[144]229 a { text-decoration: none; #9529B9; }
[143]230 h1, h2 { font-weight: 200; margin-bottom: 2rem; }
231 h1 { font-size: 3em; }
232 .container { flex:1; min-height: 100%; margin-bottom: 1em; }
233 .footer { margin: 1em; }
234 .footer p { font-size: 0.8em; }
235 </style>
236 </head>
237 <body>
238 <div class="container">
239 <h1>Yukari's Gap</h1>
[142]240`
241
242var YUKARI_HTML_PAGE_END string = `
[143]243 </div>
244 <div class="footer">
245 <p>Yukari rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
[146]246 <a href="https://git.chaotic.ninja/usr/yakumo_izuru/yukari">view on 「混沌とした 忍者」Git</a>
[142]247 </p>
248 </div>
249</body>
250</html>`
251
252var FAVICON_BYTES []byte
253
254func init() {
255 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
256
257 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
258 var err error
259 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
260 `<input type="hidden" name="yukariurl" value="{{.BaseURL}}" />{{if .YukariHash}}<input type="hidden" name="yukarihash" value="{{.YukariHash}}" />{{end}}`)
261 if err != nil {
262 panic(err)
263 }
264 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
265<input type="checkbox" id="yukaritoggle" autocomplete="off" />
266<div id="yukariheader">
267 <form method="get">
268 <label for="yukaritoggle">hide</label>
269 <span><a href="/">Yukari's Gap</a></span>
270 <input type="url" value="{{.BaseURL}}" name="{{.URLParamName}}" {{if .HasYukariKey }}readonly="true"{{end}} />
[146]271 This is a <a href="https://git.chaotic.ninja/usr/yakumo_izuru/yukari">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
[142]272 </form>
273</div>
274<style>
275body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
[144]276#yukariheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #9826FF; background: #33004A; padding: 4px; color: #D881FF; height: 42px; }
[142]277#yukariheader * { padding: 0; margin: 0; }
278#yukariheader p { padding: 0 0 0.7em 0; display: block; }
[143]279#yukariheader a { color: #8934DB; font-weight: bold; display: inline; }
[142]280#yukariheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
281#yukariheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
282input[type=checkbox]#yukaritoggle { display: none; }
283input[type=checkbox]#yukaritoggle:checked ~ div { display: none; visibility: hidden; }
284#yukariheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
285</style>
286`)
287 if err != nil {
288 panic(err)
289 }
290 HTML_MAIN_PAGE_FORM, err = template.New("html_main_page_form").Parse(`
291 <form action="post">
292 Visit url: <input placeholder="https://url.." name="{{.URLParamName}}" autofocus />
293 <input type="submit" value="go" />
294 </form>`)
295 if err != nil {
296 panic(err)
297 }
298}
299
300func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
301
302 if appRequestHandler(ctx) {
303 return
304 }
305
306 requestHash := popRequestParam(ctx, []byte(cfg.HashParameter))
307
308 requestURI := popRequestParam(ctx, []byte(cfg.UrlParameter))
309
310 if requestURI == nil {
311 p.serveMainPage(ctx, 200, nil)
312 return
313 }
314
315 if p.Key != nil {
316 if !verifyRequestURI(requestURI, requestHash, p.Key) {
317 // HTTP status code 403 : Forbidden
318 error_message := fmt.Sprintf(`invalid "%s" parameter. hint: Hash URL Parameter`, cfg.HashParameter)
319 p.serveMainPage(ctx, 403, errors.New(error_message))
320 return
321 }
322 }
323
324 requestURIQuery := ctx.QueryArgs().QueryString()
325 if len(requestURIQuery) > 0 {
326 if bytes.ContainsRune(requestURI, '?') {
327 requestURI = append(requestURI, '&')
328 } else {
329 requestURI = append(requestURI, '?')
330 }
331 requestURI = append(requestURI, requestURIQuery...)
332 }
333
334 p.ProcessUri(ctx, string(requestURI), 0)
335}
336
337func (p *Proxy) ProcessUri(ctx *fasthttp.RequestCtx, requestURIStr string, redirectCount int) {
338 parsedURI, err := url.Parse(requestURIStr)
339
340 if err != nil {
341 // HTTP status code 500 : Internal Server Error
342 p.serveMainPage(ctx, 500, err)
343 return
344 }
345
346 if parsedURI.Scheme == "" {
347 requestURIStr = "https://" + requestURIStr
348 parsedURI, err = url.Parse(requestURIStr)
349 if err != nil {
350 p.serveMainPage(ctx, 500, err)
351 return
352 }
353 }
354
355 // Serve an intermediate page for protocols other than HTTP(S)
356 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
357 p.serveExitYukariPage(ctx, parsedURI)
358 return
359 }
360
361 req := fasthttp.AcquireRequest()
362 defer fasthttp.ReleaseRequest(req)
363 req.SetConnectionClose()
364
365 if cfg.Debug {
366 log.Println(string(ctx.Method()), requestURIStr)
367 }
368
369 req.SetRequestURI(requestURIStr)
370 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"))
371
372 resp := fasthttp.AcquireResponse()
373 defer fasthttp.ReleaseResponse(resp)
374
375 req.Header.SetMethodBytes(ctx.Method())
376 if ctx.IsPost() || ctx.IsPut() {
377 req.SetBody(ctx.PostBody())
378 }
379
380 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
381
382 if err != nil {
383 if err == fasthttp.ErrTimeout {
384 // HTTP status code 504 : Gateway Time-Out
385 p.serveMainPage(ctx, 504, err)
386 } else {
387 // HTTP status code 500 : Internal Server Error
388 p.serveMainPage(ctx, 500, err)
389 }
390 return
391 }
392
393 if resp.StatusCode() != 200 {
394 switch resp.StatusCode() {
395 case 301, 302, 303, 307, 308:
396 loc := resp.Header.Peek("Location")
397 if loc != nil {
398 if p.FollowRedirect && ctx.IsGet() {
399 // GET method: Yukari follows the redirect
400 if redirectCount < MAX_REDIRECT_COUNT {
401 if cfg.Debug {
402 log.Println("follow redirect to", string(loc))
403 }
404 p.ProcessUri(ctx, string(loc), redirectCount+1)
405 } else {
406 p.serveMainPage(ctx, 310, errors.New("Too many redirects"))
407 }
408 return
409 } else {
410 // Other HTTP methods: Yukari does NOT follow the redirect
411 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
412 url, err := rc.ProxifyURI(loc)
413 if err == nil {
414 ctx.SetStatusCode(resp.StatusCode())
415 ctx.Response.Header.Add("Location", url)
416 if cfg.Debug {
417 log.Println("redirect to", string(loc))
418 }
419 return
420 }
421 }
422 }
423 }
424 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
425 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
426 return
427 }
428
429 contentTypeBytes := resp.Header.Peek("Content-Type")
430
431 if contentTypeBytes == nil {
432 // HTTP status code 503 : Service Unavailable
433 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
434 return
435 }
436
437 contentTypeString := string(contentTypeBytes)
438
439 // decode Content-Type header
440 contentType, error := contenttype.ParseContentType(contentTypeString)
441 if error != nil {
442 // HTTP status code 503 : Service Unavailable
443 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
444 return
445 }
446
447 // content-disposition
448 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
449
450 // check content type
451 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
452 // it is not a usual content type
453 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
454 // force attachment for allowed content type
455 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
456 } else {
457 // deny access to forbidden content type
458 // HTTP status code 403 : Forbidden
459 p.serveMainPage(ctx, 403, errors.New("forbidden content type "+parsedURI.String()))
460 return
461 }
462 }
463
464 // HACK : replace */xhtml by text/html
465 if contentType.SubType == "xhtml" {
466 contentType.TopLevelType = "text"
467 contentType.SubType = "html"
468 contentType.Suffix = ""
469 }
470
471 // conversion to UTF-8
472 var responseBody []byte
473
474 if contentType.TopLevelType == "text" {
475 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
476 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
477 responseBody, err = e.NewDecoder().Bytes(resp.Body())
478 if err != nil {
479 // HTTP status code 503 : Service Unavailable
480 p.serveMainPage(ctx, 503, err)
481 return
482 }
483 } else {
484 responseBody = resp.Body()
485 }
486 // update the charset or specify it
487 contentType.Parameters["charset"] = "UTF-8"
488 } else {
489 responseBody = resp.Body()
490 }
491
492 //
493 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
494
495 // set the content type
496 ctx.SetContentType(contentType.String())
497
498 // output according to MIME type
499 switch {
500 case contentType.SubType == "css" && contentType.Suffix == "":
501 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
502 case contentType.SubType == "html" && contentType.Suffix == "":
503 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
504 sanitizeHTML(rc, ctx, responseBody)
505 if !rc.BodyInjected {
506 p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
507 if len(rc.Key) > 0 {
508 p.HasYukariKey = true
509 }
510 err := HTML_BODY_EXTENSION.Execute(ctx, p)
511 if err != nil {
512 if cfg.Debug {
513 fmt.Println("failed to inject body extension", err)
514 }
515 }
516 }
517 default:
518 if contentDispositionBytes != nil {
519 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
520 }
521 ctx.Write(responseBody)
522 }
523}
524
525// force content-disposition to attachment
526func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
527 var contentDispositionParams map[string]string
528
529 if contentDispositionBytes != nil {
530 var err error
531 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
532 if err != nil {
533 contentDispositionParams = make(map[string]string)
534 }
535 } else {
536 contentDispositionParams = make(map[string]string)
537 }
538
539 _, fileNameDefined := contentDispositionParams["filename"]
540 if !fileNameDefined {
541 // TODO : sanitize filename
542 contentDispositionParams["fileName"] = filepath.Base(url.Path)
543 }
544
545 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
546}
547
548func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
549 // serve robots.txt
550 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
551 ctx.SetContentType("text/plain")
552 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
553 return true
554 }
555
556 // server favicon.ico
557 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
558 ctx.SetContentType("image/png")
559 ctx.Write(FAVICON_BYTES)
560 return true
561 }
562
563 return false
564}
565
566func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
567 param := ctx.QueryArgs().PeekBytes(paramName)
568
569 if param == nil {
570 param = ctx.PostArgs().PeekBytes(paramName)
571 ctx.PostArgs().DelBytes(paramName)
572 }
573 ctx.QueryArgs().DelBytes(paramName)
574
575 return param
576}
577
578func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
579 // TODO
580
581 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
582
583 if urlSlices == nil {
584 out.Write(css)
585 return
586 }
587
588 startIndex := 0
589
590 for _, s := range urlSlices {
591 urlStart := s[4]
592 urlEnd := s[5]
593
594 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
595 out.Write(css[startIndex:urlStart])
596 out.Write([]byte(uri))
597 startIndex = urlEnd
598 } else if cfg.Debug {
599 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
600 }
601 }
602 if startIndex < len(css) {
603 out.Write(css[startIndex:len(css)])
604 }
605}
606
607func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
608 r := bytes.NewReader(htmlDoc)
609 decoder := html.NewTokenizer(r)
610 decoder.AllowCDATA(true)
611
612 unsafeElements := make([][]byte, 0, 8)
613 state := STATE_DEFAULT
614 for {
615 token := decoder.Next()
616 if token == html.ErrorToken {
617 err := decoder.Err()
618 if err != io.EOF {
619 log.Println("failed to parse HTML")
620 }
621 break
622 }
623
624 if len(unsafeElements) == 0 {
625
626 switch token {
627 case html.StartTagToken, html.SelfClosingTagToken:
628 tag, hasAttrs := decoder.TagName()
629 safe := !inArray(tag, UNSAFE_ELEMENTS)
630 if !safe {
631 if token != html.SelfClosingTagToken {
632 var unsafeTag []byte = make([]byte, len(tag))
633 copy(unsafeTag, tag)
634 unsafeElements = append(unsafeElements, unsafeTag)
635 }
636 break
637 }
638 if bytes.Equal(tag, []byte("base")) {
639 for {
640 attrName, attrValue, moreAttr := decoder.TagAttr()
641 if bytes.Equal(attrName, []byte("href")) {
642 parsedURI, err := url.Parse(string(attrValue))
643 if err == nil {
644 rc.BaseURL = parsedURI
645 }
646 }
647 if !moreAttr {
648 break
649 }
650 }
651 break
652 }
653 if bytes.Equal(tag, []byte("noscript")) {
654 state = STATE_IN_NOSCRIPT
655 break
656 }
657 var attrs [][][]byte
658 if hasAttrs {
659 for {
660 attrName, attrValue, moreAttr := decoder.TagAttr()
661 attrs = append(attrs, [][]byte{
662 attrName,
663 attrValue,
664 []byte(html.EscapeString(string(attrValue))),
665 })
666 if !moreAttr {
667 break
668 }
669 }
670 }
671 if bytes.Equal(tag, []byte("link")) {
672 sanitizeLinkTag(rc, out, attrs)
673 break
674 }
675
676 if bytes.Equal(tag, []byte("meta")) {
677 sanitizeMetaTag(rc, out, attrs)
678 break
679 }
680
681 fmt.Fprintf(out, "<%s", tag)
682
683 if hasAttrs {
684 sanitizeAttrs(rc, out, attrs)
685 }
686
687 if token == html.SelfClosingTagToken {
688 fmt.Fprintf(out, " />")
689 } else {
690 fmt.Fprintf(out, ">")
691 if bytes.Equal(tag, []byte("style")) {
692 state = STATE_IN_STYLE
693 }
694 }
695
696 if bytes.Equal(tag, []byte("head")) {
697 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
698 }
699
700 if bytes.Equal(tag, []byte("form")) {
701 var formURL *url.URL
702 for _, attr := range attrs {
703 if bytes.Equal(attr[0], []byte("action")) {
704 formURL, _ = url.Parse(string(attr[1]))
705 formURL = mergeURIs(rc.BaseURL, formURL)
706 break
707 }
708 }
709 if formURL == nil {
710 formURL = rc.BaseURL
711 }
712 urlStr := formURL.String()
713 var key string
714 if rc.Key != nil {
715 key = hash(urlStr, rc.Key)
716 }
717 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key, cfg.UrlParameter, cfg.HashParameter})
718 if err != nil {
719 if cfg.Debug {
720 fmt.Println("failed to inject body extension", err)
721 }
722 }
723 }
724
725 case html.EndTagToken:
726 tag, _ := decoder.TagName()
727 writeEndTag := true
728 switch string(tag) {
729 case "body":
730 p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
731 if len(rc.Key) > 0 {
732 p.HasYukariKey = true
733 }
734 err := HTML_BODY_EXTENSION.Execute(out, p)
735 if err != nil {
736 if cfg.Debug {
737 fmt.Println("failed to inject body extension", err)
738 }
739 }
740 rc.BodyInjected = true
741 case "style":
742 state = STATE_DEFAULT
743 case "noscript":
744 state = STATE_DEFAULT
745 writeEndTag = false
746 }
747 // skip noscript tags - only the tag, not the content, because javascript is sanitized
748 if writeEndTag {
749 fmt.Fprintf(out, "</%s>", tag)
750 }
751
752 case html.TextToken:
753 switch state {
754 case STATE_DEFAULT:
755 fmt.Fprintf(out, "%s", decoder.Raw())
756 case STATE_IN_STYLE:
757 sanitizeCSS(rc, out, decoder.Raw())
758 case STATE_IN_NOSCRIPT:
759 sanitizeHTML(rc, out, decoder.Raw())
760 }
761
762 case html.CommentToken:
763 // ignore comment. TODO : parse IE conditional comment
764
765 case html.DoctypeToken:
766 out.Write(decoder.Raw())
767 }
768 } else {
769 switch token {
770 case html.StartTagToken, html.SelfClosingTagToken:
771 tag, _ := decoder.TagName()
772 if inArray(tag, UNSAFE_ELEMENTS) {
773 unsafeElements = append(unsafeElements, tag)
774 }
775
776 case html.EndTagToken:
777 tag, _ := decoder.TagName()
778 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
779 unsafeElements = unsafeElements[:len(unsafeElements)-1]
780 }
781 }
782 }
783 }
784}
785
786func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
787 exclude := false
788 for _, attr := range attrs {
789 attrName := attr[0]
790 attrValue := attr[1]
791 if bytes.Equal(attrName, []byte("rel")) {
792 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
793 exclude = true
794 break
795 }
796 }
797 if bytes.Equal(attrName, []byte("as")) {
798 if bytes.Equal(attrValue, []byte("script")) {
799 exclude = true
800 break
801 }
802 }
803 }
804
805 if !exclude {
806 out.Write([]byte("<link"))
807 for _, attr := range attrs {
808 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
809 }
810 out.Write([]byte(">"))
811 }
812}
813
814func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
815 var http_equiv []byte
816 var content []byte
817
818 for _, attr := range attrs {
819 attrName := attr[0]
820 attrValue := attr[1]
821 if bytes.Equal(attrName, []byte("http-equiv")) {
822 http_equiv = bytes.ToLower(attrValue)
823 // exclude some <meta http-equiv="..." ..>
824 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
825 return
826 }
827 }
828 if bytes.Equal(attrName, []byte("content")) {
829 content = attrValue
830 }
831 if bytes.Equal(attrName, []byte("charset")) {
832 // exclude <meta charset="...">
833 return
834 }
835 }
836
837 out.Write([]byte("<meta"))
838 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
839 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
840 contentUrl := content[urlIndex+4:]
841 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
842 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
843 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
844 contentUrl = contentUrl[1 : len(contentUrl)-1]
845 }
846 }
847 // output proxify result
848 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
849 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
850 }
851 } else {
852 if len(http_equiv) > 0 {
853 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
854 }
855 sanitizeAttrs(rc, out, attrs)
856 }
857 out.Write([]byte(">"))
858}
859
860func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
861 for _, attr := range attrs {
862 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
863 }
864}
865
866func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
867 if inArray(attrName, SAFE_ATTRIBUTES) {
868 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
869 return
870 }
871 switch string(attrName) {
872 case "src", "href", "action":
873 if uri, err := rc.ProxifyURI(attrValue); err == nil {
874 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
875 } else if cfg.Debug {
876 log.Println("cannot proxify uri:", string(attrValue))
877 }
878 case "style":
879 cssAttr := bytes.NewBuffer(nil)
880 sanitizeCSS(rc, cssAttr, attrValue)
881 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
882 }
883}
884
885func mergeURIs(u1, u2 *url.URL) *url.URL {
886 if u2 == nil {
887 return u1
888 }
889 return u1.ResolveReference(u2)
890}
891
892// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
893// avoid memory allocation (except for the scheme)
894func sanitizeURI(uri []byte) ([]byte, string) {
895 first_rune_index := 0
896 first_rune_seen := false
897 scheme_last_index := -1
898 buffer := bytes.NewBuffer(make([]byte, 0, 10))
899
900 // remove trailing space and special characters
901 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
902
903 // loop over byte by byte
904 for i, c := range uri {
905 // ignore special characters and space (c <= 32)
906 if c > 32 {
907 // append to the lower case of the rune to buffer
908 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
909 c = c + 'a' - 'A'
910 }
911
912 buffer.WriteByte(c)
913
914 // update the first rune index that is not a special rune
915 if !first_rune_seen {
916 first_rune_index = i
917 first_rune_seen = true
918 }
919
920 if c == ':' {
921 // colon rune found, we have found the scheme
922 scheme_last_index = i
923 break
924 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
925 // special case : most probably a relative URI
926 break
927 }
928 }
929 }
930
931 if scheme_last_index != -1 {
932 // scheme found
933 // copy the "lower case without special runes scheme" before the ":" rune
934 scheme_start_index := scheme_last_index - buffer.Len() + 1
935 copy(uri[scheme_start_index:], buffer.Bytes())
936 // and return the result
937 return uri[scheme_start_index:], buffer.String()
938 } else {
939 // scheme NOT found
940 return uri[first_rune_index:], ""
941 }
942}
943
944func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
945 // sanitize URI
946 uri, scheme := sanitizeURI(uri)
947
948 // remove javascript protocol
949 if scheme == "javascript:" {
950 return "", nil
951 }
952
953 // TODO check malicious data: - e.g. data:script
954 if scheme == "data:" {
955 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
956 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
957 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
958 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
959 bytes.HasPrefix(uri, []byte("data:image/webp")) {
960 // should be safe
961 return string(uri), nil
962 } else {
963 // unsafe data
964 return "", nil
965 }
966 }
967
968 // parse the uri
969 u, err := url.Parse(string(uri))
970 if err != nil {
971 return "", err
972 }
973
974 // get the fragment (with the prefix "#")
975 fragment := ""
976 if len(u.Fragment) > 0 {
977 fragment = "#" + u.Fragment
978 }
979
980 // reset the fragment: it is not included in the yukariurl
981 u.Fragment = ""
982
983 // merge the URI with the document URI
984 u = mergeURIs(rc.BaseURL, u)
985
986 // simple internal link ?
987 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
988 if u.Scheme == rc.BaseURL.Scheme &&
989 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
990 u.Host == rc.BaseURL.Host &&
991 u.Path == rc.BaseURL.Path &&
992 u.RawQuery == rc.BaseURL.RawQuery {
993 // the fragment is the only difference between the document URI and the uri parameter
994 return fragment, nil
995 }
996
997 // return full URI and fragment (if not empty)
998 yukari_uri := u.String()
999
1000 if rc.Key == nil {
1001 return fmt.Sprintf("./?%s=%s%s", cfg.UrlParameter, url.QueryEscape(yukari_uri), fragment), nil
1002 }
1003 return fmt.Sprintf("./?%s=%s&%s=%s%s", cfg.HashParameter, hash(yukari_uri, rc.Key), cfg.UrlParameter, url.QueryEscape(yukari_uri), fragment), nil
1004}
1005
1006func inArray(b []byte, a [][]byte) bool {
1007 for _, b2 := range a {
1008 if bytes.Equal(b, b2) {
1009 return true
1010 }
1011 }
1012 return false
1013}
1014
1015func hash(msg string, key []byte) string {
1016 mac := hmac.New(sha256.New, key)
1017 mac.Write([]byte(msg))
1018 return hex.EncodeToString(mac.Sum(nil))
1019}
1020
1021func verifyRequestURI(uri, hashMsg, key []byte) bool {
1022 h := make([]byte, hex.DecodedLen(len(hashMsg)))
1023 _, err := hex.Decode(h, hashMsg)
1024 if err != nil {
1025 if cfg.Debug {
1026 log.Println("hmac error:", err)
1027 }
1028 return false
1029 }
1030 mac := hmac.New(sha256.New, key)
1031 mac.Write(uri)
1032 return hmac.Equal(h, mac.Sum(nil))
1033}
1034
1035func (p *Proxy) serveExitYukariPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
1036 ctx.SetContentType("text/html")
1037 ctx.SetStatusCode(403)
1038 ctx.Write([]byte(YUKARI_HTML_PAGE_START))
1039 ctx.Write([]byte("<h2>You are about to exit YukariSukima</h2>"))
1040 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1041 ctx.Write([]byte(html.EscapeString(uri.String())))
1042 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1043 ctx.Write([]byte(html.EscapeString(uri.String())))
1044 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1045 ctx.Write([]byte(YUKARI_HTML_PAGE_END))
1046}
1047
1048func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
1049 ctx.SetContentType("text/html; charset=UTF-8")
1050 ctx.SetStatusCode(statusCode)
1051 ctx.Write([]byte(YUKARI_HTML_PAGE_START))
1052 if err != nil {
1053 if cfg.Debug {
1054 log.Println("error:", err)
1055 }
1056 ctx.Write([]byte("<h2>Error: "))
1057 ctx.Write([]byte(html.EscapeString(err.Error())))
1058 ctx.Write([]byte("</h2>"))
1059 }
1060 if p.Key == nil {
1061 p := HTMLMainPageFormParam{cfg.UrlParameter}
1062 err := HTML_MAIN_PAGE_FORM.Execute(ctx, p)
1063 if err != nil {
1064 if cfg.Debug {
1065 fmt.Println("failed to inject main page form", err)
1066 }
1067 }
1068 } else {
1069 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1070 }
1071 ctx.Write([]byte(YUKARI_HTML_PAGE_END))
1072}
1073
1074func main() {
1075 listenAddress := flag.String("listen", cfg.ListenAddress, "Listen address")
1076 key := flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1077 IPV6 := flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
1078 debug := flag.Bool("debug", cfg.Debug, "Debug mode")
1079 requestTimeout := flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
1080 followRedirect := flag.Bool("followredirect", cfg.FollowRedirect, "Follow HTTP GET redirect")
1081 proxyenv := flag.Bool("proxyenv", false, "Use a HTTP proxy as set in the environment (HTTP_PROXY, HTTPS_PROXY and NO_PROXY). Overrides -proxy, -socks5, -ipv6.")
1082 proxy := flag.String("proxy", "", "Use the specified HTTP proxy (ie: '[user:pass@]hostname:port'). Overrides -socks5, -ipv6.")
1083 socks5 := flag.String("socks5", "", "Use a SOCKS5 proxy (ie: 'hostname:port'). Overrides -ipv6.")
1084 urlParameter := flag.String("urlparam", cfg.UrlParameter, "user-defined requesting string URL parameter name (ie: '/?url=...' or '/?u=...')")
1085 hashParameter := flag.String("hashparam", cfg.HashParameter, "user-defined requesting string HASH parameter name (ie: '/?hash=...' or '/?h=...')")
1086 version := flag.Bool("version", false, "Show version")
1087 flag.Parse()
1088
1089 cfg.ListenAddress = *listenAddress
1090 cfg.Key = *key
1091 cfg.IPV6 = *IPV6
1092 cfg.Debug = *debug
1093 cfg.RequestTimeout = *requestTimeout
1094 cfg.FollowRedirect = *followRedirect
1095 cfg.UrlParameter = *urlParameter
1096 cfg.HashParameter = *hashParameter
1097
1098 if *version {
1099 fmt.Println(VERSION)
1100 return
1101 }
1102
1103 if *proxyenv && os.Getenv("HTTP_PROXY") == "" && os.Getenv("HTTPS_PROXY") == "" {
1104 log.Fatal("Error -proxyenv is used but no environment variables named 'HTTP_PROXY' and/or 'HTTPS_PROXY' could be found.")
1105 os.Exit(1)
1106 }
1107
1108 if *proxyenv {
1109 CLIENT.Dial = fasthttpproxy.FasthttpProxyHTTPDialer()
1110 log.Println("Using environment defined proxy(ies).")
1111 } else if *proxy != "" {
1112 CLIENT.Dial = fasthttpproxy.FasthttpHTTPDialer(*proxy)
1113 log.Println("Using custom HTTP proxy.")
1114 } else if *socks5 != "" {
1115 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1116 log.Println("Using Socks5 proxy.")
1117 } else if cfg.IPV6 {
1118 CLIENT.Dial = fasthttp.DialDualStack
1119 log.Println("Using dual stack (IPv4/IPv6) direct connections.")
1120 } else {
1121 CLIENT.Dial = fasthttp.Dial
1122 log.Println("Using IPv4 only direct connections.")
1123 }
1124
1125 p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second,
1126 FollowRedirect: cfg.FollowRedirect}
1127
1128 if cfg.Key != "" {
1129 var err error
1130 p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
1131 if err != nil {
1132 log.Fatal("Error parsing -key", err.Error())
1133 os.Exit(1)
1134 }
1135 }
1136
1137 log.Println("listening on", cfg.ListenAddress)
1138
1139 if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
1140 log.Fatal("Error in ListenAndServe:", err)
1141 }
1142}
Note: See TracBrowser for help on using the repository browser.