source: code/trunk/yukari.go@ 143

Last change on this file since 143 was 143, checked in by Izuru Yakumo, 22 months ago

Use better coloring [1/2]

Signed-off-by: Izuru Yakumo <yakumo.izuru@…>

File size: 32.8 KB
RevLine 
[142]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "marisa.chaotic.ninja/yukari/config"
31 "marisa.chaotic.ninja/yukari/contenttype"
32)
33
34const (
35 STATE_DEFAULT int = 0
36 STATE_IN_STYLE int = 1
37 STATE_IN_NOSCRIPT int = 2
38)
39
40const VERSION = "v0.2.1"
41
42const MAX_REDIRECT_COUNT = 5
43
44var CLIENT *fasthttp.Client = &fasthttp.Client{
45 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
46 ReadBufferSize: 16 * 1024, // 16K
47}
48
49var cfg *config.Config = config.DefaultConfig
50
51var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
52 // html
53 contenttype.NewFilterEquals("text", "html", ""),
54 contenttype.NewFilterEquals("application", "xhtml", "xml"),
55 // css
56 contenttype.NewFilterEquals("text", "css", ""),
57 // images
58 contenttype.NewFilterEquals("image", "gif", ""),
59 contenttype.NewFilterEquals("image", "png", ""),
60 contenttype.NewFilterEquals("image", "jpeg", ""),
61 contenttype.NewFilterEquals("image", "pjpeg", ""),
62 contenttype.NewFilterEquals("image", "webp", ""),
63 contenttype.NewFilterEquals("image", "tiff", ""),
64 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
65 contenttype.NewFilterEquals("image", "bmp", ""),
66 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
67 contenttype.NewFilterEquals("image", "x-icon", ""),
68 contenttype.NewFilterEquals("image", "svg", "xml"),
69 // fonts
70 contenttype.NewFilterEquals("application", "font-otf", ""),
71 contenttype.NewFilterEquals("application", "font-ttf", ""),
72 contenttype.NewFilterEquals("application", "font-woff", ""),
73 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
74})
75
76var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
77 // texts
78 contenttype.NewFilterEquals("text", "csv", ""),
79 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
80 contenttype.NewFilterEquals("text", "plain", ""),
81 // API
82 contenttype.NewFilterEquals("application", "json", ""),
83 // Documents
84 contenttype.NewFilterEquals("application", "x-latex", ""),
85 contenttype.NewFilterEquals("application", "pdf", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
88 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
89 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
90 // Compressed archives
91 contenttype.NewFilterEquals("application", "zip", ""),
92 contenttype.NewFilterEquals("application", "gzip", ""),
93 contenttype.NewFilterEquals("application", "x-compressed", ""),
94 contenttype.NewFilterEquals("application", "x-gtar", ""),
95 contenttype.NewFilterEquals("application", "x-compress", ""),
96 // Generic binary
97 contenttype.NewFilterEquals("application", "octet-stream", ""),
98})
99
100var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
101 "charset": true,
102}
103
104var UNSAFE_ELEMENTS [][]byte = [][]byte{
105 []byte("applet"),
106 []byte("canvas"),
107 []byte("embed"),
108 //[]byte("iframe"),
109 []byte("math"),
110 []byte("script"),
111 []byte("svg"),
112}
113
114var SAFE_ATTRIBUTES [][]byte = [][]byte{
115 []byte("abbr"),
116 []byte("accesskey"),
117 []byte("align"),
118 []byte("alt"),
119 []byte("as"),
120 []byte("autocomplete"),
121 []byte("charset"),
122 []byte("checked"),
123 []byte("class"),
124 []byte("content"),
125 []byte("contenteditable"),
126 []byte("contextmenu"),
127 []byte("dir"),
128 []byte("for"),
129 []byte("height"),
130 []byte("hidden"),
131 []byte("hreflang"),
132 []byte("id"),
133 []byte("lang"),
134 []byte("media"),
135 []byte("method"),
136 []byte("name"),
137 []byte("nowrap"),
138 []byte("placeholder"),
139 []byte("property"),
140 []byte("rel"),
141 []byte("spellcheck"),
142 []byte("tabindex"),
143 []byte("target"),
144 []byte("title"),
145 []byte("translate"),
146 []byte("type"),
147 []byte("value"),
148 []byte("width"),
149}
150
151var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
152 []byte("alternate"),
153 []byte("archives"),
154 []byte("author"),
155 []byte("copyright"),
156 []byte("first"),
157 []byte("help"),
158 []byte("icon"),
159 []byte("index"),
160 []byte("last"),
161 []byte("license"),
162 []byte("manifest"),
163 []byte("next"),
164 []byte("pingback"),
165 []byte("prev"),
166 []byte("publisher"),
167 []byte("search"),
168 []byte("shortcut icon"),
169 []byte("stylesheet"),
170 []byte("up"),
171}
172
173var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
174 // X-UA-Compatible will be added automaticaly, so it can be skipped
175 []byte("date"),
176 []byte("last-modified"),
177 []byte("refresh"), // URL rewrite
178 // []byte("location"), TODO URL rewrite
179 []byte("content-language"),
180}
181
182var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
183
184type Proxy struct {
185 Key []byte
186 RequestTimeout time.Duration
187 FollowRedirect bool
188}
189
190type RequestConfig struct {
191 Key []byte
192 BaseURL *url.URL
193 BodyInjected bool
194}
195
196type HTMLBodyExtParam struct {
197 BaseURL string
198 HasYukariKey bool
199 URLParamName string
200}
201
202type HTMLFormExtParam struct {
203 BaseURL string
204 YukariHash string
205 URLParamName string
206 HashParamName string
207}
208type HTMLMainPageFormParam struct {
209 URLParamName string
210}
211
212var HTML_FORM_EXTENSION *template.Template
213var HTML_BODY_EXTENSION *template.Template
214var HTML_MAIN_PAGE_FORM *template.Template
215var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
216<meta http-equiv="X-UA-Compatible" content="IE=edge">
217<meta name="referrer" content="no-referrer">
218`
219
220var YUKARI_HTML_PAGE_START string = `<!doctype html>
221<html>
[143]222 <head>
223 <title>Yukari's Gap</title>
224 <meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
225 <style>
226 html { height: 100%; }
227 body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #BC4BFC; background: #FAFAFA; margin: 0;
228 padding: 0; font-size: 1.1em; }
229 input { border: 1px solid #888; padding: 0.3em; color: #BC4BFC; background: #FFF; font-size: 1.1em; }
230 input[placeholder] { width:80%; }
231 a { text-decoration: none; #2980b9; }
232 h1, h2 { font-weight: 200; margin-bottom: 2rem; }
233 h1 { font-size: 3em; }
234 .container { flex:1; min-height: 100%; margin-bottom: 1em; }
235 .footer { margin: 1em; }
236 .footer p { font-size: 0.8em; }
237 </style>
238 </head>
239 <body>
240 <div class="container">
241 <h1>Yukari's Gap</h1>
[142]242`
243
244var YUKARI_HTML_PAGE_END string = `
[143]245 </div>
246 <div class="footer">
247 <p>Yukari rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
248 <a href="https://git.chaotic.ninja/yakumo.izuru/yukari">view on 「混沌とした 忍者」Git</a>
[142]249 </p>
250 </div>
251</body>
252</html>`
253
254var FAVICON_BYTES []byte
255
256func init() {
257 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
258
259 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
260 var err error
261 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
262 `<input type="hidden" name="yukariurl" value="{{.BaseURL}}" />{{if .YukariHash}}<input type="hidden" name="yukarihash" value="{{.YukariHash}}" />{{end}}`)
263 if err != nil {
264 panic(err)
265 }
266 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
267<input type="checkbox" id="yukaritoggle" autocomplete="off" />
268<div id="yukariheader">
269 <form method="get">
270 <label for="yukaritoggle">hide</label>
271 <span><a href="/">Yukari's Gap</a></span>
272 <input type="url" value="{{.BaseURL}}" name="{{.URLParamName}}" {{if .HasYukariKey }}readonly="true"{{end}} />
[143]273 This is a <a href="https://git.chaotic.ninja/yakumo.izuru/yukari">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
[142]274 </form>
275</div>
276<style>
277body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
278#yukariheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
279#yukariheader * { padding: 0; margin: 0; }
280#yukariheader p { padding: 0 0 0.7em 0; display: block; }
[143]281#yukariheader a { color: #8934DB; font-weight: bold; display: inline; }
[142]282#yukariheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
283#yukariheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
284input[type=checkbox]#yukaritoggle { display: none; }
285input[type=checkbox]#yukaritoggle:checked ~ div { display: none; visibility: hidden; }
286#yukariheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
287</style>
288`)
289 if err != nil {
290 panic(err)
291 }
292 HTML_MAIN_PAGE_FORM, err = template.New("html_main_page_form").Parse(`
293 <form action="post">
294 Visit url: <input placeholder="https://url.." name="{{.URLParamName}}" autofocus />
295 <input type="submit" value="go" />
296 </form>`)
297 if err != nil {
298 panic(err)
299 }
300}
301
302func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
303
304 if appRequestHandler(ctx) {
305 return
306 }
307
308 requestHash := popRequestParam(ctx, []byte(cfg.HashParameter))
309
310 requestURI := popRequestParam(ctx, []byte(cfg.UrlParameter))
311
312 if requestURI == nil {
313 p.serveMainPage(ctx, 200, nil)
314 return
315 }
316
317 if p.Key != nil {
318 if !verifyRequestURI(requestURI, requestHash, p.Key) {
319 // HTTP status code 403 : Forbidden
320 error_message := fmt.Sprintf(`invalid "%s" parameter. hint: Hash URL Parameter`, cfg.HashParameter)
321 p.serveMainPage(ctx, 403, errors.New(error_message))
322 return
323 }
324 }
325
326 requestURIQuery := ctx.QueryArgs().QueryString()
327 if len(requestURIQuery) > 0 {
328 if bytes.ContainsRune(requestURI, '?') {
329 requestURI = append(requestURI, '&')
330 } else {
331 requestURI = append(requestURI, '?')
332 }
333 requestURI = append(requestURI, requestURIQuery...)
334 }
335
336 p.ProcessUri(ctx, string(requestURI), 0)
337}
338
339func (p *Proxy) ProcessUri(ctx *fasthttp.RequestCtx, requestURIStr string, redirectCount int) {
340 parsedURI, err := url.Parse(requestURIStr)
341
342 if err != nil {
343 // HTTP status code 500 : Internal Server Error
344 p.serveMainPage(ctx, 500, err)
345 return
346 }
347
348 if parsedURI.Scheme == "" {
349 requestURIStr = "https://" + requestURIStr
350 parsedURI, err = url.Parse(requestURIStr)
351 if err != nil {
352 p.serveMainPage(ctx, 500, err)
353 return
354 }
355 }
356
357 // Serve an intermediate page for protocols other than HTTP(S)
358 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
359 p.serveExitYukariPage(ctx, parsedURI)
360 return
361 }
362
363 req := fasthttp.AcquireRequest()
364 defer fasthttp.ReleaseRequest(req)
365 req.SetConnectionClose()
366
367 if cfg.Debug {
368 log.Println(string(ctx.Method()), requestURIStr)
369 }
370
371 req.SetRequestURI(requestURIStr)
372 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"))
373
374 resp := fasthttp.AcquireResponse()
375 defer fasthttp.ReleaseResponse(resp)
376
377 req.Header.SetMethodBytes(ctx.Method())
378 if ctx.IsPost() || ctx.IsPut() {
379 req.SetBody(ctx.PostBody())
380 }
381
382 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
383
384 if err != nil {
385 if err == fasthttp.ErrTimeout {
386 // HTTP status code 504 : Gateway Time-Out
387 p.serveMainPage(ctx, 504, err)
388 } else {
389 // HTTP status code 500 : Internal Server Error
390 p.serveMainPage(ctx, 500, err)
391 }
392 return
393 }
394
395 if resp.StatusCode() != 200 {
396 switch resp.StatusCode() {
397 case 301, 302, 303, 307, 308:
398 loc := resp.Header.Peek("Location")
399 if loc != nil {
400 if p.FollowRedirect && ctx.IsGet() {
401 // GET method: Yukari follows the redirect
402 if redirectCount < MAX_REDIRECT_COUNT {
403 if cfg.Debug {
404 log.Println("follow redirect to", string(loc))
405 }
406 p.ProcessUri(ctx, string(loc), redirectCount+1)
407 } else {
408 p.serveMainPage(ctx, 310, errors.New("Too many redirects"))
409 }
410 return
411 } else {
412 // Other HTTP methods: Yukari does NOT follow the redirect
413 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
414 url, err := rc.ProxifyURI(loc)
415 if err == nil {
416 ctx.SetStatusCode(resp.StatusCode())
417 ctx.Response.Header.Add("Location", url)
418 if cfg.Debug {
419 log.Println("redirect to", string(loc))
420 }
421 return
422 }
423 }
424 }
425 }
426 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
427 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
428 return
429 }
430
431 contentTypeBytes := resp.Header.Peek("Content-Type")
432
433 if contentTypeBytes == nil {
434 // HTTP status code 503 : Service Unavailable
435 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
436 return
437 }
438
439 contentTypeString := string(contentTypeBytes)
440
441 // decode Content-Type header
442 contentType, error := contenttype.ParseContentType(contentTypeString)
443 if error != nil {
444 // HTTP status code 503 : Service Unavailable
445 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
446 return
447 }
448
449 // content-disposition
450 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
451
452 // check content type
453 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
454 // it is not a usual content type
455 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
456 // force attachment for allowed content type
457 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
458 } else {
459 // deny access to forbidden content type
460 // HTTP status code 403 : Forbidden
461 p.serveMainPage(ctx, 403, errors.New("forbidden content type "+parsedURI.String()))
462 return
463 }
464 }
465
466 // HACK : replace */xhtml by text/html
467 if contentType.SubType == "xhtml" {
468 contentType.TopLevelType = "text"
469 contentType.SubType = "html"
470 contentType.Suffix = ""
471 }
472
473 // conversion to UTF-8
474 var responseBody []byte
475
476 if contentType.TopLevelType == "text" {
477 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
478 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
479 responseBody, err = e.NewDecoder().Bytes(resp.Body())
480 if err != nil {
481 // HTTP status code 503 : Service Unavailable
482 p.serveMainPage(ctx, 503, err)
483 return
484 }
485 } else {
486 responseBody = resp.Body()
487 }
488 // update the charset or specify it
489 contentType.Parameters["charset"] = "UTF-8"
490 } else {
491 responseBody = resp.Body()
492 }
493
494 //
495 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
496
497 // set the content type
498 ctx.SetContentType(contentType.String())
499
500 // output according to MIME type
501 switch {
502 case contentType.SubType == "css" && contentType.Suffix == "":
503 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
504 case contentType.SubType == "html" && contentType.Suffix == "":
505 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
506 sanitizeHTML(rc, ctx, responseBody)
507 if !rc.BodyInjected {
508 p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
509 if len(rc.Key) > 0 {
510 p.HasYukariKey = true
511 }
512 err := HTML_BODY_EXTENSION.Execute(ctx, p)
513 if err != nil {
514 if cfg.Debug {
515 fmt.Println("failed to inject body extension", err)
516 }
517 }
518 }
519 default:
520 if contentDispositionBytes != nil {
521 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
522 }
523 ctx.Write(responseBody)
524 }
525}
526
527// force content-disposition to attachment
528func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
529 var contentDispositionParams map[string]string
530
531 if contentDispositionBytes != nil {
532 var err error
533 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
534 if err != nil {
535 contentDispositionParams = make(map[string]string)
536 }
537 } else {
538 contentDispositionParams = make(map[string]string)
539 }
540
541 _, fileNameDefined := contentDispositionParams["filename"]
542 if !fileNameDefined {
543 // TODO : sanitize filename
544 contentDispositionParams["fileName"] = filepath.Base(url.Path)
545 }
546
547 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
548}
549
550func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
551 // serve robots.txt
552 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
553 ctx.SetContentType("text/plain")
554 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
555 return true
556 }
557
558 // server favicon.ico
559 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
560 ctx.SetContentType("image/png")
561 ctx.Write(FAVICON_BYTES)
562 return true
563 }
564
565 return false
566}
567
568func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
569 param := ctx.QueryArgs().PeekBytes(paramName)
570
571 if param == nil {
572 param = ctx.PostArgs().PeekBytes(paramName)
573 ctx.PostArgs().DelBytes(paramName)
574 }
575 ctx.QueryArgs().DelBytes(paramName)
576
577 return param
578}
579
580func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
581 // TODO
582
583 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
584
585 if urlSlices == nil {
586 out.Write(css)
587 return
588 }
589
590 startIndex := 0
591
592 for _, s := range urlSlices {
593 urlStart := s[4]
594 urlEnd := s[5]
595
596 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
597 out.Write(css[startIndex:urlStart])
598 out.Write([]byte(uri))
599 startIndex = urlEnd
600 } else if cfg.Debug {
601 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
602 }
603 }
604 if startIndex < len(css) {
605 out.Write(css[startIndex:len(css)])
606 }
607}
608
609func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
610 r := bytes.NewReader(htmlDoc)
611 decoder := html.NewTokenizer(r)
612 decoder.AllowCDATA(true)
613
614 unsafeElements := make([][]byte, 0, 8)
615 state := STATE_DEFAULT
616 for {
617 token := decoder.Next()
618 if token == html.ErrorToken {
619 err := decoder.Err()
620 if err != io.EOF {
621 log.Println("failed to parse HTML")
622 }
623 break
624 }
625
626 if len(unsafeElements) == 0 {
627
628 switch token {
629 case html.StartTagToken, html.SelfClosingTagToken:
630 tag, hasAttrs := decoder.TagName()
631 safe := !inArray(tag, UNSAFE_ELEMENTS)
632 if !safe {
633 if token != html.SelfClosingTagToken {
634 var unsafeTag []byte = make([]byte, len(tag))
635 copy(unsafeTag, tag)
636 unsafeElements = append(unsafeElements, unsafeTag)
637 }
638 break
639 }
640 if bytes.Equal(tag, []byte("base")) {
641 for {
642 attrName, attrValue, moreAttr := decoder.TagAttr()
643 if bytes.Equal(attrName, []byte("href")) {
644 parsedURI, err := url.Parse(string(attrValue))
645 if err == nil {
646 rc.BaseURL = parsedURI
647 }
648 }
649 if !moreAttr {
650 break
651 }
652 }
653 break
654 }
655 if bytes.Equal(tag, []byte("noscript")) {
656 state = STATE_IN_NOSCRIPT
657 break
658 }
659 var attrs [][][]byte
660 if hasAttrs {
661 for {
662 attrName, attrValue, moreAttr := decoder.TagAttr()
663 attrs = append(attrs, [][]byte{
664 attrName,
665 attrValue,
666 []byte(html.EscapeString(string(attrValue))),
667 })
668 if !moreAttr {
669 break
670 }
671 }
672 }
673 if bytes.Equal(tag, []byte("link")) {
674 sanitizeLinkTag(rc, out, attrs)
675 break
676 }
677
678 if bytes.Equal(tag, []byte("meta")) {
679 sanitizeMetaTag(rc, out, attrs)
680 break
681 }
682
683 fmt.Fprintf(out, "<%s", tag)
684
685 if hasAttrs {
686 sanitizeAttrs(rc, out, attrs)
687 }
688
689 if token == html.SelfClosingTagToken {
690 fmt.Fprintf(out, " />")
691 } else {
692 fmt.Fprintf(out, ">")
693 if bytes.Equal(tag, []byte("style")) {
694 state = STATE_IN_STYLE
695 }
696 }
697
698 if bytes.Equal(tag, []byte("head")) {
699 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
700 }
701
702 if bytes.Equal(tag, []byte("form")) {
703 var formURL *url.URL
704 for _, attr := range attrs {
705 if bytes.Equal(attr[0], []byte("action")) {
706 formURL, _ = url.Parse(string(attr[1]))
707 formURL = mergeURIs(rc.BaseURL, formURL)
708 break
709 }
710 }
711 if formURL == nil {
712 formURL = rc.BaseURL
713 }
714 urlStr := formURL.String()
715 var key string
716 if rc.Key != nil {
717 key = hash(urlStr, rc.Key)
718 }
719 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key, cfg.UrlParameter, cfg.HashParameter})
720 if err != nil {
721 if cfg.Debug {
722 fmt.Println("failed to inject body extension", err)
723 }
724 }
725 }
726
727 case html.EndTagToken:
728 tag, _ := decoder.TagName()
729 writeEndTag := true
730 switch string(tag) {
731 case "body":
732 p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
733 if len(rc.Key) > 0 {
734 p.HasYukariKey = true
735 }
736 err := HTML_BODY_EXTENSION.Execute(out, p)
737 if err != nil {
738 if cfg.Debug {
739 fmt.Println("failed to inject body extension", err)
740 }
741 }
742 rc.BodyInjected = true
743 case "style":
744 state = STATE_DEFAULT
745 case "noscript":
746 state = STATE_DEFAULT
747 writeEndTag = false
748 }
749 // skip noscript tags - only the tag, not the content, because javascript is sanitized
750 if writeEndTag {
751 fmt.Fprintf(out, "</%s>", tag)
752 }
753
754 case html.TextToken:
755 switch state {
756 case STATE_DEFAULT:
757 fmt.Fprintf(out, "%s", decoder.Raw())
758 case STATE_IN_STYLE:
759 sanitizeCSS(rc, out, decoder.Raw())
760 case STATE_IN_NOSCRIPT:
761 sanitizeHTML(rc, out, decoder.Raw())
762 }
763
764 case html.CommentToken:
765 // ignore comment. TODO : parse IE conditional comment
766
767 case html.DoctypeToken:
768 out.Write(decoder.Raw())
769 }
770 } else {
771 switch token {
772 case html.StartTagToken, html.SelfClosingTagToken:
773 tag, _ := decoder.TagName()
774 if inArray(tag, UNSAFE_ELEMENTS) {
775 unsafeElements = append(unsafeElements, tag)
776 }
777
778 case html.EndTagToken:
779 tag, _ := decoder.TagName()
780 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
781 unsafeElements = unsafeElements[:len(unsafeElements)-1]
782 }
783 }
784 }
785 }
786}
787
788func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
789 exclude := false
790 for _, attr := range attrs {
791 attrName := attr[0]
792 attrValue := attr[1]
793 if bytes.Equal(attrName, []byte("rel")) {
794 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
795 exclude = true
796 break
797 }
798 }
799 if bytes.Equal(attrName, []byte("as")) {
800 if bytes.Equal(attrValue, []byte("script")) {
801 exclude = true
802 break
803 }
804 }
805 }
806
807 if !exclude {
808 out.Write([]byte("<link"))
809 for _, attr := range attrs {
810 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
811 }
812 out.Write([]byte(">"))
813 }
814}
815
816func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
817 var http_equiv []byte
818 var content []byte
819
820 for _, attr := range attrs {
821 attrName := attr[0]
822 attrValue := attr[1]
823 if bytes.Equal(attrName, []byte("http-equiv")) {
824 http_equiv = bytes.ToLower(attrValue)
825 // exclude some <meta http-equiv="..." ..>
826 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
827 return
828 }
829 }
830 if bytes.Equal(attrName, []byte("content")) {
831 content = attrValue
832 }
833 if bytes.Equal(attrName, []byte("charset")) {
834 // exclude <meta charset="...">
835 return
836 }
837 }
838
839 out.Write([]byte("<meta"))
840 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
841 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
842 contentUrl := content[urlIndex+4:]
843 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
844 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
845 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
846 contentUrl = contentUrl[1 : len(contentUrl)-1]
847 }
848 }
849 // output proxify result
850 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
851 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
852 }
853 } else {
854 if len(http_equiv) > 0 {
855 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
856 }
857 sanitizeAttrs(rc, out, attrs)
858 }
859 out.Write([]byte(">"))
860}
861
862func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
863 for _, attr := range attrs {
864 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
865 }
866}
867
868func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
869 if inArray(attrName, SAFE_ATTRIBUTES) {
870 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
871 return
872 }
873 switch string(attrName) {
874 case "src", "href", "action":
875 if uri, err := rc.ProxifyURI(attrValue); err == nil {
876 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
877 } else if cfg.Debug {
878 log.Println("cannot proxify uri:", string(attrValue))
879 }
880 case "style":
881 cssAttr := bytes.NewBuffer(nil)
882 sanitizeCSS(rc, cssAttr, attrValue)
883 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
884 }
885}
886
887func mergeURIs(u1, u2 *url.URL) *url.URL {
888 if u2 == nil {
889 return u1
890 }
891 return u1.ResolveReference(u2)
892}
893
894// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
895// avoid memory allocation (except for the scheme)
896func sanitizeURI(uri []byte) ([]byte, string) {
897 first_rune_index := 0
898 first_rune_seen := false
899 scheme_last_index := -1
900 buffer := bytes.NewBuffer(make([]byte, 0, 10))
901
902 // remove trailing space and special characters
903 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
904
905 // loop over byte by byte
906 for i, c := range uri {
907 // ignore special characters and space (c <= 32)
908 if c > 32 {
909 // append to the lower case of the rune to buffer
910 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
911 c = c + 'a' - 'A'
912 }
913
914 buffer.WriteByte(c)
915
916 // update the first rune index that is not a special rune
917 if !first_rune_seen {
918 first_rune_index = i
919 first_rune_seen = true
920 }
921
922 if c == ':' {
923 // colon rune found, we have found the scheme
924 scheme_last_index = i
925 break
926 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
927 // special case : most probably a relative URI
928 break
929 }
930 }
931 }
932
933 if scheme_last_index != -1 {
934 // scheme found
935 // copy the "lower case without special runes scheme" before the ":" rune
936 scheme_start_index := scheme_last_index - buffer.Len() + 1
937 copy(uri[scheme_start_index:], buffer.Bytes())
938 // and return the result
939 return uri[scheme_start_index:], buffer.String()
940 } else {
941 // scheme NOT found
942 return uri[first_rune_index:], ""
943 }
944}
945
946func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
947 // sanitize URI
948 uri, scheme := sanitizeURI(uri)
949
950 // remove javascript protocol
951 if scheme == "javascript:" {
952 return "", nil
953 }
954
955 // TODO check malicious data: - e.g. data:script
956 if scheme == "data:" {
957 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
958 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
959 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
960 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
961 bytes.HasPrefix(uri, []byte("data:image/webp")) {
962 // should be safe
963 return string(uri), nil
964 } else {
965 // unsafe data
966 return "", nil
967 }
968 }
969
970 // parse the uri
971 u, err := url.Parse(string(uri))
972 if err != nil {
973 return "", err
974 }
975
976 // get the fragment (with the prefix "#")
977 fragment := ""
978 if len(u.Fragment) > 0 {
979 fragment = "#" + u.Fragment
980 }
981
982 // reset the fragment: it is not included in the yukariurl
983 u.Fragment = ""
984
985 // merge the URI with the document URI
986 u = mergeURIs(rc.BaseURL, u)
987
988 // simple internal link ?
989 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
990 if u.Scheme == rc.BaseURL.Scheme &&
991 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
992 u.Host == rc.BaseURL.Host &&
993 u.Path == rc.BaseURL.Path &&
994 u.RawQuery == rc.BaseURL.RawQuery {
995 // the fragment is the only difference between the document URI and the uri parameter
996 return fragment, nil
997 }
998
999 // return full URI and fragment (if not empty)
1000 yukari_uri := u.String()
1001
1002 if rc.Key == nil {
1003 return fmt.Sprintf("./?%s=%s%s", cfg.UrlParameter, url.QueryEscape(yukari_uri), fragment), nil
1004 }
1005 return fmt.Sprintf("./?%s=%s&%s=%s%s", cfg.HashParameter, hash(yukari_uri, rc.Key), cfg.UrlParameter, url.QueryEscape(yukari_uri), fragment), nil
1006}
1007
1008func inArray(b []byte, a [][]byte) bool {
1009 for _, b2 := range a {
1010 if bytes.Equal(b, b2) {
1011 return true
1012 }
1013 }
1014 return false
1015}
1016
1017func hash(msg string, key []byte) string {
1018 mac := hmac.New(sha256.New, key)
1019 mac.Write([]byte(msg))
1020 return hex.EncodeToString(mac.Sum(nil))
1021}
1022
1023func verifyRequestURI(uri, hashMsg, key []byte) bool {
1024 h := make([]byte, hex.DecodedLen(len(hashMsg)))
1025 _, err := hex.Decode(h, hashMsg)
1026 if err != nil {
1027 if cfg.Debug {
1028 log.Println("hmac error:", err)
1029 }
1030 return false
1031 }
1032 mac := hmac.New(sha256.New, key)
1033 mac.Write(uri)
1034 return hmac.Equal(h, mac.Sum(nil))
1035}
1036
1037func (p *Proxy) serveExitYukariPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
1038 ctx.SetContentType("text/html")
1039 ctx.SetStatusCode(403)
1040 ctx.Write([]byte(YUKARI_HTML_PAGE_START))
1041 ctx.Write([]byte("<h2>You are about to exit YukariSukima</h2>"))
1042 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1043 ctx.Write([]byte(html.EscapeString(uri.String())))
1044 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1045 ctx.Write([]byte(html.EscapeString(uri.String())))
1046 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1047 ctx.Write([]byte(YUKARI_HTML_PAGE_END))
1048}
1049
1050func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
1051 ctx.SetContentType("text/html; charset=UTF-8")
1052 ctx.SetStatusCode(statusCode)
1053 ctx.Write([]byte(YUKARI_HTML_PAGE_START))
1054 if err != nil {
1055 if cfg.Debug {
1056 log.Println("error:", err)
1057 }
1058 ctx.Write([]byte("<h2>Error: "))
1059 ctx.Write([]byte(html.EscapeString(err.Error())))
1060 ctx.Write([]byte("</h2>"))
1061 }
1062 if p.Key == nil {
1063 p := HTMLMainPageFormParam{cfg.UrlParameter}
1064 err := HTML_MAIN_PAGE_FORM.Execute(ctx, p)
1065 if err != nil {
1066 if cfg.Debug {
1067 fmt.Println("failed to inject main page form", err)
1068 }
1069 }
1070 } else {
1071 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1072 }
1073 ctx.Write([]byte(YUKARI_HTML_PAGE_END))
1074}
1075
1076func main() {
1077 listenAddress := flag.String("listen", cfg.ListenAddress, "Listen address")
1078 key := flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1079 IPV6 := flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
1080 debug := flag.Bool("debug", cfg.Debug, "Debug mode")
1081 requestTimeout := flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
1082 followRedirect := flag.Bool("followredirect", cfg.FollowRedirect, "Follow HTTP GET redirect")
1083 proxyenv := flag.Bool("proxyenv", false, "Use a HTTP proxy as set in the environment (HTTP_PROXY, HTTPS_PROXY and NO_PROXY). Overrides -proxy, -socks5, -ipv6.")
1084 proxy := flag.String("proxy", "", "Use the specified HTTP proxy (ie: '[user:pass@]hostname:port'). Overrides -socks5, -ipv6.")
1085 socks5 := flag.String("socks5", "", "Use a SOCKS5 proxy (ie: 'hostname:port'). Overrides -ipv6.")
1086 urlParameter := flag.String("urlparam", cfg.UrlParameter, "user-defined requesting string URL parameter name (ie: '/?url=...' or '/?u=...')")
1087 hashParameter := flag.String("hashparam", cfg.HashParameter, "user-defined requesting string HASH parameter name (ie: '/?hash=...' or '/?h=...')")
1088 version := flag.Bool("version", false, "Show version")
1089 flag.Parse()
1090
1091 cfg.ListenAddress = *listenAddress
1092 cfg.Key = *key
1093 cfg.IPV6 = *IPV6
1094 cfg.Debug = *debug
1095 cfg.RequestTimeout = *requestTimeout
1096 cfg.FollowRedirect = *followRedirect
1097 cfg.UrlParameter = *urlParameter
1098 cfg.HashParameter = *hashParameter
1099
1100 if *version {
1101 fmt.Println(VERSION)
1102 return
1103 }
1104
1105 if *proxyenv && os.Getenv("HTTP_PROXY") == "" && os.Getenv("HTTPS_PROXY") == "" {
1106 log.Fatal("Error -proxyenv is used but no environment variables named 'HTTP_PROXY' and/or 'HTTPS_PROXY' could be found.")
1107 os.Exit(1)
1108 }
1109
1110 if *proxyenv {
1111 CLIENT.Dial = fasthttpproxy.FasthttpProxyHTTPDialer()
1112 log.Println("Using environment defined proxy(ies).")
1113 } else if *proxy != "" {
1114 CLIENT.Dial = fasthttpproxy.FasthttpHTTPDialer(*proxy)
1115 log.Println("Using custom HTTP proxy.")
1116 } else if *socks5 != "" {
1117 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1118 log.Println("Using Socks5 proxy.")
1119 } else if cfg.IPV6 {
1120 CLIENT.Dial = fasthttp.DialDualStack
1121 log.Println("Using dual stack (IPv4/IPv6) direct connections.")
1122 } else {
1123 CLIENT.Dial = fasthttp.Dial
1124 log.Println("Using IPv4 only direct connections.")
1125 }
1126
1127 p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second,
1128 FollowRedirect: cfg.FollowRedirect}
1129
1130 if cfg.Key != "" {
1131 var err error
1132 p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
1133 if err != nil {
1134 log.Fatal("Error parsing -key", err.Error())
1135 os.Exit(1)
1136 }
1137 }
1138
1139 log.Println("listening on", cfg.ListenAddress)
1140
1141 if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
1142 log.Fatal("Error in ListenAndServe:", err)
1143 }
1144}
Note: See TracBrowser for help on using the repository browser.