source: code/trunk/morty.go@ 117

Last change on this file since 117 was 117, checked in by asciimoo, 5 years ago

Merge pull request #93 from dalf/fix-selfclosingtag

[fix] fix self closing tag token

File size: 27.8 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "os"
17 "path/filepath"
18 "regexp"
19 "strings"
20 "time"
21 "unicode/utf8"
22
23 "github.com/valyala/fasthttp"
24 "github.com/valyala/fasthttp/fasthttpproxy"
25 "golang.org/x/net/html"
26 "golang.org/x/net/html/charset"
27 "golang.org/x/text/encoding"
28
29 "github.com/asciimoo/morty/contenttype"
30)
31
32const (
33 STATE_DEFAULT int = 0
34 STATE_IN_STYLE int = 1
35 STATE_IN_NOSCRIPT int = 2
36)
37
38const VERSION = "v0.2.0"
39
40var DEBUG = os.Getenv("DEBUG") != "false"
41
42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
44 ReadBufferSize: 16 * 1024, // 16K
45}
46
47var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
48
49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
65 contenttype.NewFilterEquals("image", "x-icon", ""),
66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
106 []byte("math"),
107 []byte("script"),
108 []byte("svg"),
109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
116 []byte("as"),
117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
128 []byte("hreflang"),
129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
148var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
149 []byte("alternate"),
150 []byte("archives"),
151 []byte("author"),
152 []byte("copyright"),
153 []byte("first"),
154 []byte("help"),
155 []byte("icon"),
156 []byte("index"),
157 []byte("last"),
158 []byte("license"),
159 []byte("manifest"),
160 []byte("next"),
161 []byte("pingback"),
162 []byte("prev"),
163 []byte("publisher"),
164 []byte("search"),
165 []byte("shortcut icon"),
166 []byte("stylesheet"),
167 []byte("up"),
168}
169
170var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
171 // X-UA-Compatible will be added automaticaly, so it can be skipped
172 []byte("date"),
173 []byte("last-modified"),
174 []byte("refresh"), // URL rewrite
175 // []byte("location"), TODO URL rewrite
176 []byte("content-language"),
177}
178
179type Proxy struct {
180 Key []byte
181 RequestTimeout time.Duration
182}
183
184type RequestConfig struct {
185 Key []byte
186 BaseURL *url.URL
187}
188
189var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
190
191var HTML_BODY_EXTENSION string = `
192<input type="checkbox" id="mortytoggle" autocomplete="off" />
193<div id="mortyheader">
194 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
195</div>
196<style>
197#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
198#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
199#mortyheader p { padding: 0 0 0.7em 0; display: block; }
200#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
201#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
202input[type=checkbox]#mortytoggle { display: none; }
203input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
204</style>
205`
206
207var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
208<meta http-equiv="X-UA-Compatible" content="IE=edge">
209<meta name="referrer" content="no-referrer">
210`
211
212var MORTY_HTML_PAGE_START string = `<!doctype html>
213<html>
214<head>
215<title>MortyProxy</title>
216<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
217<style>
218html { height: 100%; }
219body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
220input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
221input[placeholder] { width:80%; }
222a { text-decoration: none; #2980b9; }
223h1, h2 { font-weight: 200; margin-bottom: 2rem; }
224h1 { font-size: 3em; }
225.container { flex:1; min-height: 100%; margin-bottom: 1em; }
226.footer { margin: 1em; }
227.footer p { font-size: 0.8em; }
228</style>
229</head>
230<body>
231 <div class="container">
232 <h1>MortyProxy</h1>
233`
234
235var MORTY_HTML_PAGE_END string = `
236 </div>
237 <div class="footer">
238 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
239 <a href="https://github.com/asciimoo/morty">view on github</a>
240 </p>
241 </div>
242</body>
243</html>`
244
245var FAVICON_BYTES []byte
246
247func init() {
248 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
249
250 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
251}
252
253func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
254
255 if appRequestHandler(ctx) {
256 return
257 }
258
259 requestHash := popRequestParam(ctx, []byte("mortyhash"))
260
261 requestURI := popRequestParam(ctx, []byte("mortyurl"))
262
263 if requestURI == nil {
264 p.serveMainPage(ctx, 200, nil)
265 return
266 }
267
268 if p.Key != nil {
269 if !verifyRequestURI(requestURI, requestHash, p.Key) {
270 // HTTP status code 403 : Forbidden
271 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
272 return
273 }
274 }
275
276 parsedURI, err := url.Parse(string(requestURI))
277
278 if err != nil {
279 // HTTP status code 500 : Internal Server Error
280 p.serveMainPage(ctx, 500, err)
281 return
282 }
283
284 // Serve an intermediate page for protocols other than HTTP(S)
285 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
286 p.serveExitMortyPage(ctx, parsedURI)
287 return
288 }
289
290 req := fasthttp.AcquireRequest()
291 defer fasthttp.ReleaseRequest(req)
292 req.SetConnectionClose()
293
294 requestURIStr := string(requestURI)
295
296 if DEBUG {
297 log.Println("getting", requestURIStr)
298 }
299
300 req.SetRequestURI(requestURIStr)
301 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
302
303 resp := fasthttp.AcquireResponse()
304 defer fasthttp.ReleaseResponse(resp)
305
306 req.Header.SetMethodBytes(ctx.Method())
307 if ctx.IsPost() || ctx.IsPut() {
308 req.SetBody(ctx.PostBody())
309 }
310
311 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
312
313 if err != nil {
314 if err == fasthttp.ErrTimeout {
315 // HTTP status code 504 : Gateway Time-Out
316 p.serveMainPage(ctx, 504, err)
317 } else {
318 // HTTP status code 500 : Internal Server Error
319 p.serveMainPage(ctx, 500, err)
320 }
321 return
322 }
323
324 if resp.StatusCode() != 200 {
325 switch resp.StatusCode() {
326 case 301, 302, 303, 307, 308:
327 loc := resp.Header.Peek("Location")
328 if loc != nil {
329 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
330 url, err := rc.ProxifyURI(loc)
331 if err == nil {
332 ctx.SetStatusCode(resp.StatusCode())
333 ctx.Response.Header.Add("Location", url)
334 if DEBUG {
335 log.Println("redirect to", string(loc))
336 }
337 return
338 }
339 }
340 }
341 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
342 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
343 return
344 }
345
346 contentTypeBytes := resp.Header.Peek("Content-Type")
347
348 if contentTypeBytes == nil {
349 // HTTP status code 503 : Service Unavailable
350 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
351 return
352 }
353
354 contentTypeString := string(contentTypeBytes)
355
356 // decode Content-Type header
357 contentType, error := contenttype.ParseContentType(contentTypeString)
358 if error != nil {
359 // HTTP status code 503 : Service Unavailable
360 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
361 return
362 }
363
364 // content-disposition
365 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
366
367 // check content type
368 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
369 // it is not a usual content type
370 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
371 // force attachment for allowed content type
372 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
373 } else {
374 // deny access to forbidden content type
375 // HTTP status code 403 : Forbidden
376 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
377 return
378 }
379 }
380
381 // HACK : replace */xhtml by text/html
382 if contentType.SubType == "xhtml" {
383 contentType.TopLevelType = "text"
384 contentType.SubType = "html"
385 contentType.Suffix = ""
386 }
387
388 // conversion to UTF-8
389 var responseBody []byte
390
391 if contentType.TopLevelType == "text" {
392 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
393 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
394 responseBody, err = e.NewDecoder().Bytes(resp.Body())
395 if err != nil {
396 // HTTP status code 503 : Service Unavailable
397 p.serveMainPage(ctx, 503, err)
398 return
399 }
400 } else {
401 responseBody = resp.Body()
402 }
403 // update the charset or specify it
404 contentType.Parameters["charset"] = "UTF-8"
405 } else {
406 responseBody = resp.Body()
407 }
408
409 //
410 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
411
412 // set the content type
413 ctx.SetContentType(contentType.String())
414
415 // output according to MIME type
416 switch {
417 case contentType.SubType == "css" && contentType.Suffix == "":
418 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
419 case contentType.SubType == "html" && contentType.Suffix == "":
420 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
421 default:
422 if contentDispositionBytes != nil {
423 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
424 }
425 ctx.Write(responseBody)
426 }
427}
428
429// force content-disposition to attachment
430func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
431 var contentDispositionParams map[string]string
432
433 if contentDispositionBytes != nil {
434 var err error
435 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
436 if err != nil {
437 contentDispositionParams = make(map[string]string)
438 }
439 } else {
440 contentDispositionParams = make(map[string]string)
441 }
442
443 _, fileNameDefined := contentDispositionParams["filename"]
444 if !fileNameDefined {
445 // TODO : sanitize filename
446 contentDispositionParams["fileName"] = filepath.Base(url.Path)
447 }
448
449 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
450}
451
452func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
453 // serve robots.txt
454 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
455 ctx.SetContentType("text/plain")
456 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
457 return true
458 }
459
460 // server favicon.ico
461 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
462 ctx.SetContentType("image/png")
463 ctx.Write(FAVICON_BYTES)
464 return true
465 }
466
467 return false
468}
469
470func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
471 param := ctx.QueryArgs().PeekBytes(paramName)
472
473 if param == nil {
474 param = ctx.PostArgs().PeekBytes(paramName)
475 if param != nil {
476 ctx.PostArgs().DelBytes(paramName)
477 }
478 } else {
479 ctx.QueryArgs().DelBytes(paramName)
480 }
481
482 return param
483}
484
485func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
486 // TODO
487
488 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
489
490 if urlSlices == nil {
491 out.Write(css)
492 return
493 }
494
495 startIndex := 0
496
497 for _, s := range urlSlices {
498 urlStart := s[4]
499 urlEnd := s[5]
500
501 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
502 out.Write(css[startIndex:urlStart])
503 out.Write([]byte(uri))
504 startIndex = urlEnd
505 } else if DEBUG {
506 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
507 }
508 }
509 if startIndex < len(css) {
510 out.Write(css[startIndex:len(css)])
511 }
512}
513
514func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
515 r := bytes.NewReader(htmlDoc)
516 decoder := html.NewTokenizer(r)
517 decoder.AllowCDATA(true)
518
519 unsafeElements := make([][]byte, 0, 8)
520 state := STATE_DEFAULT
521 for {
522 token := decoder.Next()
523 if token == html.ErrorToken {
524 err := decoder.Err()
525 if err != io.EOF {
526 log.Println("failed to parse HTML")
527 }
528 break
529 }
530
531 if len(unsafeElements) == 0 {
532
533 switch token {
534 case html.StartTagToken, html.SelfClosingTagToken:
535 tag, hasAttrs := decoder.TagName()
536 safe := !inArray(tag, UNSAFE_ELEMENTS)
537 if !safe {
538 if token != html.SelfClosingTagToken {
539 var unsafeTag []byte = make([]byte, len(tag))
540 copy(unsafeTag, tag)
541 unsafeElements = append(unsafeElements, unsafeTag)
542 }
543 break
544 }
545 if bytes.Equal(tag, []byte("base")) {
546 for {
547 attrName, attrValue, moreAttr := decoder.TagAttr()
548 if bytes.Equal(attrName, []byte("href")) {
549 parsedURI, err := url.Parse(string(attrValue))
550 if err == nil {
551 rc.BaseURL = parsedURI
552 }
553 }
554 if !moreAttr {
555 break
556 }
557 }
558 break
559 }
560 if bytes.Equal(tag, []byte("noscript")) {
561 state = STATE_IN_NOSCRIPT
562 break
563 }
564 var attrs [][][]byte
565 if hasAttrs {
566 for {
567 attrName, attrValue, moreAttr := decoder.TagAttr()
568 attrs = append(attrs, [][]byte{
569 attrName,
570 attrValue,
571 []byte(html.EscapeString(string(attrValue))),
572 })
573 if !moreAttr {
574 break
575 }
576 }
577 }
578 if bytes.Equal(tag, []byte("link")) {
579 sanitizeLinkTag(rc, out, attrs)
580 break
581 }
582
583 if bytes.Equal(tag, []byte("meta")) {
584 sanitizeMetaTag(rc, out, attrs)
585 break
586 }
587
588 fmt.Fprintf(out, "<%s", tag)
589
590 if hasAttrs {
591 sanitizeAttrs(rc, out, attrs)
592 }
593
594 if token == html.SelfClosingTagToken {
595 fmt.Fprintf(out, " />")
596 } else {
597 fmt.Fprintf(out, ">")
598 if bytes.Equal(tag, []byte("style")) {
599 state = STATE_IN_STYLE
600 }
601 }
602
603 if bytes.Equal(tag, []byte("head")) {
604 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
605 }
606
607 if bytes.Equal(tag, []byte("form")) {
608 var formURL *url.URL
609 for _, attr := range attrs {
610 if bytes.Equal(attr[0], []byte("action")) {
611 formURL, _ = url.Parse(string(attr[1]))
612 formURL = mergeURIs(rc.BaseURL, formURL)
613 break
614 }
615 }
616 if formURL == nil {
617 formURL = rc.BaseURL
618 }
619 urlStr := formURL.String()
620 var key string
621 if rc.Key != nil {
622 key = hash(urlStr, rc.Key)
623 }
624 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
625
626 }
627
628 case html.EndTagToken:
629 tag, _ := decoder.TagName()
630 writeEndTag := true
631 switch string(tag) {
632 case "body":
633 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
634 case "style":
635 state = STATE_DEFAULT
636 case "noscript":
637 state = STATE_DEFAULT
638 writeEndTag = false
639 }
640 // skip noscript tags - only the tag, not the content, because javascript is sanitized
641 if writeEndTag {
642 fmt.Fprintf(out, "</%s>", tag)
643 }
644
645 case html.TextToken:
646 switch state {
647 case STATE_DEFAULT:
648 fmt.Fprintf(out, "%s", decoder.Raw())
649 case STATE_IN_STYLE:
650 sanitizeCSS(rc, out, decoder.Raw())
651 case STATE_IN_NOSCRIPT:
652 sanitizeHTML(rc, out, decoder.Raw())
653 }
654
655 case html.CommentToken:
656 // ignore comment. TODO : parse IE conditional comment
657
658 case html.DoctypeToken:
659 out.Write(decoder.Raw())
660 }
661 } else {
662 switch token {
663 case html.StartTagToken, html.SelfClosingTagToken:
664 tag, _ := decoder.TagName()
665 if inArray(tag, UNSAFE_ELEMENTS) {
666 unsafeElements = append(unsafeElements, tag)
667 }
668
669 case html.EndTagToken:
670 tag, _ := decoder.TagName()
671 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
672 unsafeElements = unsafeElements[:len(unsafeElements)-1]
673 }
674 }
675 }
676 }
677}
678
679func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
680 exclude := false
681 for _, attr := range attrs {
682 attrName := attr[0]
683 attrValue := attr[1]
684 if bytes.Equal(attrName, []byte("rel")) {
685 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
686 exclude = true
687 break
688 }
689 }
690 if bytes.Equal(attrName, []byte("as")) {
691 if bytes.Equal(attrValue, []byte("script")) {
692 exclude = true
693 break
694 }
695 }
696 }
697
698 if !exclude {
699 out.Write([]byte("<link"))
700 for _, attr := range attrs {
701 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
702 }
703 out.Write([]byte(">"))
704 }
705}
706
707func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
708 var http_equiv []byte
709 var content []byte
710
711 for _, attr := range attrs {
712 attrName := attr[0]
713 attrValue := attr[1]
714 if bytes.Equal(attrName, []byte("http-equiv")) {
715 http_equiv = bytes.ToLower(attrValue)
716 // exclude some <meta http-equiv="..." ..>
717 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
718 return
719 }
720 }
721 if bytes.Equal(attrName, []byte("content")) {
722 content = attrValue
723 }
724 if bytes.Equal(attrName, []byte("charset")) {
725 // exclude <meta charset="...">
726 return
727 }
728 }
729
730 out.Write([]byte("<meta"))
731 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
732 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
733 contentUrl := content[urlIndex+4:]
734 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
735 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
736 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
737 contentUrl = contentUrl[1 : len(contentUrl)-1]
738 }
739 }
740 // output proxify result
741 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
742 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
743 }
744 } else {
745 if len(http_equiv) > 0 {
746 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
747 }
748 sanitizeAttrs(rc, out, attrs)
749 }
750 out.Write([]byte(">"))
751}
752
753func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
754 for _, attr := range attrs {
755 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
756 }
757}
758
759func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
760 if inArray(attrName, SAFE_ATTRIBUTES) {
761 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
762 return
763 }
764 switch string(attrName) {
765 case "src", "href", "action":
766 if uri, err := rc.ProxifyURI(attrValue); err == nil {
767 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
768 } else if DEBUG {
769 log.Println("cannot proxify uri:", string(attrValue))
770 }
771 case "style":
772 cssAttr := bytes.NewBuffer(nil)
773 sanitizeCSS(rc, cssAttr, attrValue)
774 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
775 }
776}
777
778func mergeURIs(u1, u2 *url.URL) *url.URL {
779 if u2 == nil {
780 return u1
781 }
782 return u1.ResolveReference(u2)
783}
784
785// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
786// avoid memory allocation (except for the scheme)
787func sanitizeURI(uri []byte) ([]byte, string) {
788 first_rune_index := 0
789 first_rune_seen := false
790 scheme_last_index := -1
791 buffer := bytes.NewBuffer(make([]byte, 0, 10))
792
793 // remove trailing space and special characters
794 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
795
796 // loop over byte by byte
797 for i, c := range uri {
798 // ignore special characters and space (c <= 32)
799 if c > 32 {
800 // append to the lower case of the rune to buffer
801 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
802 c = c + 'a' - 'A'
803 }
804
805 buffer.WriteByte(c)
806
807 // update the first rune index that is not a special rune
808 if !first_rune_seen {
809 first_rune_index = i
810 first_rune_seen = true
811 }
812
813 if c == ':' {
814 // colon rune found, we have found the scheme
815 scheme_last_index = i
816 break
817 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
818 // special case : most probably a relative URI
819 break
820 }
821 }
822 }
823
824 if scheme_last_index != -1 {
825 // scheme found
826 // copy the "lower case without special runes scheme" before the ":" rune
827 scheme_start_index := scheme_last_index - buffer.Len() + 1
828 copy(uri[scheme_start_index:], buffer.Bytes())
829 // and return the result
830 return uri[scheme_start_index:], buffer.String()
831 } else {
832 // scheme NOT found
833 return uri[first_rune_index:], ""
834 }
835}
836
837func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
838 // sanitize URI
839 uri, scheme := sanitizeURI(uri)
840
841 // remove javascript protocol
842 if scheme == "javascript:" {
843 return "", nil
844 }
845
846 // TODO check malicious data: - e.g. data:script
847 if scheme == "data:" {
848 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
849 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
850 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
851 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
852 bytes.HasPrefix(uri, []byte("data:image/webp")) {
853 // should be safe
854 return string(uri), nil
855 } else {
856 // unsafe data
857 return "", nil
858 }
859 }
860
861 // parse the uri
862 u, err := url.Parse(string(uri))
863 if err != nil {
864 return "", err
865 }
866
867 // get the fragment (with the prefix "#")
868 fragment := ""
869 if len(u.Fragment) > 0 {
870 fragment = "#" + u.Fragment
871 }
872
873 // reset the fragment: it is not included in the mortyurl
874 u.Fragment = ""
875
876 // merge the URI with the document URI
877 u = mergeURIs(rc.BaseURL, u)
878
879 // simple internal link ?
880 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
881 if u.Scheme == rc.BaseURL.Scheme &&
882 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
883 u.Host == rc.BaseURL.Host &&
884 u.Path == rc.BaseURL.Path &&
885 u.RawQuery == rc.BaseURL.RawQuery {
886 // the fragment is the only difference between the document URI and the uri parameter
887 return fragment, nil
888 }
889
890 // return full URI and fragment (if not empty)
891 morty_uri := u.String()
892
893 if rc.Key == nil {
894 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
895 }
896 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
897}
898
899func inArray(b []byte, a [][]byte) bool {
900 for _, b2 := range a {
901 if bytes.Equal(b, b2) {
902 return true
903 }
904 }
905 return false
906}
907
908func hash(msg string, key []byte) string {
909 mac := hmac.New(sha256.New, key)
910 mac.Write([]byte(msg))
911 return hex.EncodeToString(mac.Sum(nil))
912}
913
914func verifyRequestURI(uri, hashMsg, key []byte) bool {
915 h := make([]byte, hex.DecodedLen(len(hashMsg)))
916 _, err := hex.Decode(h, hashMsg)
917 if err != nil {
918 if DEBUG {
919 log.Println("hmac error:", err)
920 }
921 return false
922 }
923 mac := hmac.New(sha256.New, key)
924 mac.Write(uri)
925 return hmac.Equal(h, mac.Sum(nil))
926}
927
928func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
929 ctx.SetContentType("text/html")
930 ctx.SetStatusCode(403)
931 ctx.Write([]byte(MORTY_HTML_PAGE_START))
932 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
933 ctx.Write([]byte("<p>Following</p><p><a href=\""))
934 ctx.Write([]byte(html.EscapeString(uri.String())))
935 ctx.Write([]byte("\" rel=\"noreferrer\">"))
936 ctx.Write([]byte(html.EscapeString(uri.String())))
937 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
938 ctx.Write([]byte(MORTY_HTML_PAGE_END))
939}
940
941func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
942 ctx.SetContentType("text/html; charset=UTF-8")
943 ctx.SetStatusCode(statusCode)
944 ctx.Write([]byte(MORTY_HTML_PAGE_START))
945 if err != nil {
946 if DEBUG {
947 log.Println("error:", err)
948 }
949 ctx.Write([]byte("<h2>Error: "))
950 ctx.Write([]byte(html.EscapeString(err.Error())))
951 ctx.Write([]byte("</h2>"))
952 }
953 if p.Key == nil {
954 ctx.Write([]byte(`
955 <form action="post">
956 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
957 <input type="submit" value="go" />
958 </form>`))
959 } else {
960 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
961 }
962 ctx.Write([]byte(MORTY_HTML_PAGE_END))
963}
964
965func main() {
966 default_listen_addr := os.Getenv("MORTY_ADDRESS")
967 if default_listen_addr == "" {
968 default_listen_addr = "127.0.0.1:3000"
969 }
970 default_key := os.Getenv("MORTY_KEY")
971 listen := flag.String("listen", default_listen_addr, "Listen address")
972 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
973 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
974 version := flag.Bool("version", false, "Show version")
975 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
976 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
977 flag.Parse()
978
979 if *version {
980 fmt.Println(VERSION)
981 return
982 }
983
984 if *ipv6 {
985 CLIENT.DialDualStack = true
986 }
987
988 if *socks5 != "" {
989 // this disables CLIENT.DialDualStack
990 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
991 }
992
993 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
994
995 if *key != "" {
996 var err error
997 p.Key, err = base64.StdEncoding.DecodeString(*key)
998 if err != nil {
999 log.Fatal("Error parsing -key", err.Error())
1000 os.Exit(1)
1001 }
1002 }
1003
1004 log.Println("listening on", *listen)
1005
1006 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1007 log.Fatal("Error in ListenAndServe:", err)
1008 }
1009}
Note: See TracBrowser for help on using the repository browser.