source: code/trunk/morty.go@ 118

Last change on this file since 118 was 118, checked in by asciimoo, 5 years ago

[enh] handle get form parameters

File size: 28.0 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "os"
17 "path/filepath"
18 "regexp"
19 "strings"
20 "time"
21 "unicode/utf8"
22
23 "github.com/valyala/fasthttp"
24 "github.com/valyala/fasthttp/fasthttpproxy"
25 "golang.org/x/net/html"
26 "golang.org/x/net/html/charset"
27 "golang.org/x/text/encoding"
28
29 "github.com/asciimoo/morty/contenttype"
30)
31
32const (
33 STATE_DEFAULT int = 0
34 STATE_IN_STYLE int = 1
35 STATE_IN_NOSCRIPT int = 2
36)
37
38const VERSION = "v0.2.0"
39
40var DEBUG = os.Getenv("DEBUG") != "false"
41
42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
44 ReadBufferSize: 16 * 1024, // 16K
45}
46
47var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
48
49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
65 contenttype.NewFilterEquals("image", "x-icon", ""),
66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
106 []byte("math"),
107 []byte("script"),
108 []byte("svg"),
109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
116 []byte("as"),
117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
128 []byte("hreflang"),
129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
148var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
149 []byte("alternate"),
150 []byte("archives"),
151 []byte("author"),
152 []byte("copyright"),
153 []byte("first"),
154 []byte("help"),
155 []byte("icon"),
156 []byte("index"),
157 []byte("last"),
158 []byte("license"),
159 []byte("manifest"),
160 []byte("next"),
161 []byte("pingback"),
162 []byte("prev"),
163 []byte("publisher"),
164 []byte("search"),
165 []byte("shortcut icon"),
166 []byte("stylesheet"),
167 []byte("up"),
168}
169
170var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
171 // X-UA-Compatible will be added automaticaly, so it can be skipped
172 []byte("date"),
173 []byte("last-modified"),
174 []byte("refresh"), // URL rewrite
175 // []byte("location"), TODO URL rewrite
176 []byte("content-language"),
177}
178
179type Proxy struct {
180 Key []byte
181 RequestTimeout time.Duration
182}
183
184type RequestConfig struct {
185 Key []byte
186 BaseURL *url.URL
187}
188
189var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
190
191var HTML_BODY_EXTENSION string = `
192<input type="checkbox" id="mortytoggle" autocomplete="off" />
193<div id="mortyheader">
194 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
195</div>
196<style>
197#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
198#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
199#mortyheader p { padding: 0 0 0.7em 0; display: block; }
200#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
201#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
202input[type=checkbox]#mortytoggle { display: none; }
203input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
204</style>
205`
206
207var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
208<meta http-equiv="X-UA-Compatible" content="IE=edge">
209<meta name="referrer" content="no-referrer">
210`
211
212var MORTY_HTML_PAGE_START string = `<!doctype html>
213<html>
214<head>
215<title>MortyProxy</title>
216<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
217<style>
218html { height: 100%; }
219body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
220input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
221input[placeholder] { width:80%; }
222a { text-decoration: none; #2980b9; }
223h1, h2 { font-weight: 200; margin-bottom: 2rem; }
224h1 { font-size: 3em; }
225.container { flex:1; min-height: 100%; margin-bottom: 1em; }
226.footer { margin: 1em; }
227.footer p { font-size: 0.8em; }
228</style>
229</head>
230<body>
231 <div class="container">
232 <h1>MortyProxy</h1>
233`
234
235var MORTY_HTML_PAGE_END string = `
236 </div>
237 <div class="footer">
238 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
239 <a href="https://github.com/asciimoo/morty">view on github</a>
240 </p>
241 </div>
242</body>
243</html>`
244
245var FAVICON_BYTES []byte
246
247func init() {
248 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
249
250 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
251}
252
253func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
254
255 if appRequestHandler(ctx) {
256 return
257 }
258
259 requestHash := popRequestParam(ctx, []byte("mortyhash"))
260
261 requestURI := popRequestParam(ctx, []byte("mortyurl"))
262
263 if requestURI == nil {
264 p.serveMainPage(ctx, 200, nil)
265 return
266 }
267
268 if p.Key != nil {
269 if !verifyRequestURI(requestURI, requestHash, p.Key) {
270 // HTTP status code 403 : Forbidden
271 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
272 return
273 }
274 }
275
276 requestURIQuery := ctx.QueryArgs().QueryString()
277 if len(requestURIQuery) > 0 {
278 requestURI = append(requestURI, '?')
279 requestURI = append(requestURI, requestURIQuery...)
280 }
281
282 parsedURI, err := url.Parse(string(requestURI))
283
284 if err != nil {
285 // HTTP status code 500 : Internal Server Error
286 p.serveMainPage(ctx, 500, err)
287 return
288 }
289
290 // Serve an intermediate page for protocols other than HTTP(S)
291 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
292 p.serveExitMortyPage(ctx, parsedURI)
293 return
294 }
295
296 req := fasthttp.AcquireRequest()
297 defer fasthttp.ReleaseRequest(req)
298 req.SetConnectionClose()
299
300 requestURIStr := string(requestURI)
301
302 if DEBUG {
303 log.Println("getting", requestURIStr)
304 }
305
306 req.SetRequestURI(requestURIStr)
307 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
308
309 resp := fasthttp.AcquireResponse()
310 defer fasthttp.ReleaseResponse(resp)
311
312 req.Header.SetMethodBytes(ctx.Method())
313 if ctx.IsPost() || ctx.IsPut() {
314 req.SetBody(ctx.PostBody())
315 }
316
317 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
318
319 if err != nil {
320 if err == fasthttp.ErrTimeout {
321 // HTTP status code 504 : Gateway Time-Out
322 p.serveMainPage(ctx, 504, err)
323 } else {
324 // HTTP status code 500 : Internal Server Error
325 p.serveMainPage(ctx, 500, err)
326 }
327 return
328 }
329
330 if resp.StatusCode() != 200 {
331 switch resp.StatusCode() {
332 case 301, 302, 303, 307, 308:
333 loc := resp.Header.Peek("Location")
334 if loc != nil {
335 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
336 url, err := rc.ProxifyURI(loc)
337 if err == nil {
338 ctx.SetStatusCode(resp.StatusCode())
339 ctx.Response.Header.Add("Location", url)
340 if DEBUG {
341 log.Println("redirect to", string(loc))
342 }
343 return
344 }
345 }
346 }
347 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
348 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
349 return
350 }
351
352 contentTypeBytes := resp.Header.Peek("Content-Type")
353
354 if contentTypeBytes == nil {
355 // HTTP status code 503 : Service Unavailable
356 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
357 return
358 }
359
360 contentTypeString := string(contentTypeBytes)
361
362 // decode Content-Type header
363 contentType, error := contenttype.ParseContentType(contentTypeString)
364 if error != nil {
365 // HTTP status code 503 : Service Unavailable
366 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
367 return
368 }
369
370 // content-disposition
371 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
372
373 // check content type
374 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
375 // it is not a usual content type
376 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
377 // force attachment for allowed content type
378 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
379 } else {
380 // deny access to forbidden content type
381 // HTTP status code 403 : Forbidden
382 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
383 return
384 }
385 }
386
387 // HACK : replace */xhtml by text/html
388 if contentType.SubType == "xhtml" {
389 contentType.TopLevelType = "text"
390 contentType.SubType = "html"
391 contentType.Suffix = ""
392 }
393
394 // conversion to UTF-8
395 var responseBody []byte
396
397 if contentType.TopLevelType == "text" {
398 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
399 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
400 responseBody, err = e.NewDecoder().Bytes(resp.Body())
401 if err != nil {
402 // HTTP status code 503 : Service Unavailable
403 p.serveMainPage(ctx, 503, err)
404 return
405 }
406 } else {
407 responseBody = resp.Body()
408 }
409 // update the charset or specify it
410 contentType.Parameters["charset"] = "UTF-8"
411 } else {
412 responseBody = resp.Body()
413 }
414
415 //
416 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
417
418 // set the content type
419 ctx.SetContentType(contentType.String())
420
421 // output according to MIME type
422 switch {
423 case contentType.SubType == "css" && contentType.Suffix == "":
424 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
425 case contentType.SubType == "html" && contentType.Suffix == "":
426 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
427 default:
428 if contentDispositionBytes != nil {
429 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
430 }
431 ctx.Write(responseBody)
432 }
433}
434
435// force content-disposition to attachment
436func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
437 var contentDispositionParams map[string]string
438
439 if contentDispositionBytes != nil {
440 var err error
441 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
442 if err != nil {
443 contentDispositionParams = make(map[string]string)
444 }
445 } else {
446 contentDispositionParams = make(map[string]string)
447 }
448
449 _, fileNameDefined := contentDispositionParams["filename"]
450 if !fileNameDefined {
451 // TODO : sanitize filename
452 contentDispositionParams["fileName"] = filepath.Base(url.Path)
453 }
454
455 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
456}
457
458func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
459 // serve robots.txt
460 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
461 ctx.SetContentType("text/plain")
462 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
463 return true
464 }
465
466 // server favicon.ico
467 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
468 ctx.SetContentType("image/png")
469 ctx.Write(FAVICON_BYTES)
470 return true
471 }
472
473 return false
474}
475
476func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
477 param := ctx.QueryArgs().PeekBytes(paramName)
478
479 if param == nil {
480 param = ctx.PostArgs().PeekBytes(paramName)
481 if param != nil {
482 ctx.PostArgs().DelBytes(paramName)
483 }
484 } else {
485 ctx.QueryArgs().DelBytes(paramName)
486 }
487
488 return param
489}
490
491func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
492 // TODO
493
494 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
495
496 if urlSlices == nil {
497 out.Write(css)
498 return
499 }
500
501 startIndex := 0
502
503 for _, s := range urlSlices {
504 urlStart := s[4]
505 urlEnd := s[5]
506
507 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
508 out.Write(css[startIndex:urlStart])
509 out.Write([]byte(uri))
510 startIndex = urlEnd
511 } else if DEBUG {
512 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
513 }
514 }
515 if startIndex < len(css) {
516 out.Write(css[startIndex:len(css)])
517 }
518}
519
520func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
521 r := bytes.NewReader(htmlDoc)
522 decoder := html.NewTokenizer(r)
523 decoder.AllowCDATA(true)
524
525 unsafeElements := make([][]byte, 0, 8)
526 state := STATE_DEFAULT
527 for {
528 token := decoder.Next()
529 if token == html.ErrorToken {
530 err := decoder.Err()
531 if err != io.EOF {
532 log.Println("failed to parse HTML")
533 }
534 break
535 }
536
537 if len(unsafeElements) == 0 {
538
539 switch token {
540 case html.StartTagToken, html.SelfClosingTagToken:
541 tag, hasAttrs := decoder.TagName()
542 safe := !inArray(tag, UNSAFE_ELEMENTS)
543 if !safe {
544 if token != html.SelfClosingTagToken {
545 var unsafeTag []byte = make([]byte, len(tag))
546 copy(unsafeTag, tag)
547 unsafeElements = append(unsafeElements, unsafeTag)
548 }
549 break
550 }
551 if bytes.Equal(tag, []byte("base")) {
552 for {
553 attrName, attrValue, moreAttr := decoder.TagAttr()
554 if bytes.Equal(attrName, []byte("href")) {
555 parsedURI, err := url.Parse(string(attrValue))
556 if err == nil {
557 rc.BaseURL = parsedURI
558 }
559 }
560 if !moreAttr {
561 break
562 }
563 }
564 break
565 }
566 if bytes.Equal(tag, []byte("noscript")) {
567 state = STATE_IN_NOSCRIPT
568 break
569 }
570 var attrs [][][]byte
571 if hasAttrs {
572 for {
573 attrName, attrValue, moreAttr := decoder.TagAttr()
574 attrs = append(attrs, [][]byte{
575 attrName,
576 attrValue,
577 []byte(html.EscapeString(string(attrValue))),
578 })
579 if !moreAttr {
580 break
581 }
582 }
583 }
584 if bytes.Equal(tag, []byte("link")) {
585 sanitizeLinkTag(rc, out, attrs)
586 break
587 }
588
589 if bytes.Equal(tag, []byte("meta")) {
590 sanitizeMetaTag(rc, out, attrs)
591 break
592 }
593
594 fmt.Fprintf(out, "<%s", tag)
595
596 if hasAttrs {
597 sanitizeAttrs(rc, out, attrs)
598 }
599
600 if token == html.SelfClosingTagToken {
601 fmt.Fprintf(out, " />")
602 } else {
603 fmt.Fprintf(out, ">")
604 if bytes.Equal(tag, []byte("style")) {
605 state = STATE_IN_STYLE
606 }
607 }
608
609 if bytes.Equal(tag, []byte("head")) {
610 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
611 }
612
613 if bytes.Equal(tag, []byte("form")) {
614 var formURL *url.URL
615 for _, attr := range attrs {
616 if bytes.Equal(attr[0], []byte("action")) {
617 formURL, _ = url.Parse(string(attr[1]))
618 formURL = mergeURIs(rc.BaseURL, formURL)
619 break
620 }
621 }
622 if formURL == nil {
623 formURL = rc.BaseURL
624 }
625 urlStr := formURL.String()
626 var key string
627 if rc.Key != nil {
628 key = hash(urlStr, rc.Key)
629 }
630 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
631
632 }
633
634 case html.EndTagToken:
635 tag, _ := decoder.TagName()
636 writeEndTag := true
637 switch string(tag) {
638 case "body":
639 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
640 case "style":
641 state = STATE_DEFAULT
642 case "noscript":
643 state = STATE_DEFAULT
644 writeEndTag = false
645 }
646 // skip noscript tags - only the tag, not the content, because javascript is sanitized
647 if writeEndTag {
648 fmt.Fprintf(out, "</%s>", tag)
649 }
650
651 case html.TextToken:
652 switch state {
653 case STATE_DEFAULT:
654 fmt.Fprintf(out, "%s", decoder.Raw())
655 case STATE_IN_STYLE:
656 sanitizeCSS(rc, out, decoder.Raw())
657 case STATE_IN_NOSCRIPT:
658 sanitizeHTML(rc, out, decoder.Raw())
659 }
660
661 case html.CommentToken:
662 // ignore comment. TODO : parse IE conditional comment
663
664 case html.DoctypeToken:
665 out.Write(decoder.Raw())
666 }
667 } else {
668 switch token {
669 case html.StartTagToken, html.SelfClosingTagToken:
670 tag, _ := decoder.TagName()
671 if inArray(tag, UNSAFE_ELEMENTS) {
672 unsafeElements = append(unsafeElements, tag)
673 }
674
675 case html.EndTagToken:
676 tag, _ := decoder.TagName()
677 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
678 unsafeElements = unsafeElements[:len(unsafeElements)-1]
679 }
680 }
681 }
682 }
683}
684
685func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
686 exclude := false
687 for _, attr := range attrs {
688 attrName := attr[0]
689 attrValue := attr[1]
690 if bytes.Equal(attrName, []byte("rel")) {
691 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
692 exclude = true
693 break
694 }
695 }
696 if bytes.Equal(attrName, []byte("as")) {
697 if bytes.Equal(attrValue, []byte("script")) {
698 exclude = true
699 break
700 }
701 }
702 }
703
704 if !exclude {
705 out.Write([]byte("<link"))
706 for _, attr := range attrs {
707 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
708 }
709 out.Write([]byte(">"))
710 }
711}
712
713func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
714 var http_equiv []byte
715 var content []byte
716
717 for _, attr := range attrs {
718 attrName := attr[0]
719 attrValue := attr[1]
720 if bytes.Equal(attrName, []byte("http-equiv")) {
721 http_equiv = bytes.ToLower(attrValue)
722 // exclude some <meta http-equiv="..." ..>
723 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
724 return
725 }
726 }
727 if bytes.Equal(attrName, []byte("content")) {
728 content = attrValue
729 }
730 if bytes.Equal(attrName, []byte("charset")) {
731 // exclude <meta charset="...">
732 return
733 }
734 }
735
736 out.Write([]byte("<meta"))
737 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
738 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
739 contentUrl := content[urlIndex+4:]
740 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
741 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
742 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
743 contentUrl = contentUrl[1 : len(contentUrl)-1]
744 }
745 }
746 // output proxify result
747 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
748 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
749 }
750 } else {
751 if len(http_equiv) > 0 {
752 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
753 }
754 sanitizeAttrs(rc, out, attrs)
755 }
756 out.Write([]byte(">"))
757}
758
759func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
760 for _, attr := range attrs {
761 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
762 }
763}
764
765func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
766 if inArray(attrName, SAFE_ATTRIBUTES) {
767 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
768 return
769 }
770 switch string(attrName) {
771 case "src", "href", "action":
772 if uri, err := rc.ProxifyURI(attrValue); err == nil {
773 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
774 } else if DEBUG {
775 log.Println("cannot proxify uri:", string(attrValue))
776 }
777 case "style":
778 cssAttr := bytes.NewBuffer(nil)
779 sanitizeCSS(rc, cssAttr, attrValue)
780 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
781 }
782}
783
784func mergeURIs(u1, u2 *url.URL) *url.URL {
785 if u2 == nil {
786 return u1
787 }
788 return u1.ResolveReference(u2)
789}
790
791// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
792// avoid memory allocation (except for the scheme)
793func sanitizeURI(uri []byte) ([]byte, string) {
794 first_rune_index := 0
795 first_rune_seen := false
796 scheme_last_index := -1
797 buffer := bytes.NewBuffer(make([]byte, 0, 10))
798
799 // remove trailing space and special characters
800 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
801
802 // loop over byte by byte
803 for i, c := range uri {
804 // ignore special characters and space (c <= 32)
805 if c > 32 {
806 // append to the lower case of the rune to buffer
807 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
808 c = c + 'a' - 'A'
809 }
810
811 buffer.WriteByte(c)
812
813 // update the first rune index that is not a special rune
814 if !first_rune_seen {
815 first_rune_index = i
816 first_rune_seen = true
817 }
818
819 if c == ':' {
820 // colon rune found, we have found the scheme
821 scheme_last_index = i
822 break
823 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
824 // special case : most probably a relative URI
825 break
826 }
827 }
828 }
829
830 if scheme_last_index != -1 {
831 // scheme found
832 // copy the "lower case without special runes scheme" before the ":" rune
833 scheme_start_index := scheme_last_index - buffer.Len() + 1
834 copy(uri[scheme_start_index:], buffer.Bytes())
835 // and return the result
836 return uri[scheme_start_index:], buffer.String()
837 } else {
838 // scheme NOT found
839 return uri[first_rune_index:], ""
840 }
841}
842
843func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
844 // sanitize URI
845 uri, scheme := sanitizeURI(uri)
846
847 // remove javascript protocol
848 if scheme == "javascript:" {
849 return "", nil
850 }
851
852 // TODO check malicious data: - e.g. data:script
853 if scheme == "data:" {
854 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
855 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
856 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
857 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
858 bytes.HasPrefix(uri, []byte("data:image/webp")) {
859 // should be safe
860 return string(uri), nil
861 } else {
862 // unsafe data
863 return "", nil
864 }
865 }
866
867 // parse the uri
868 u, err := url.Parse(string(uri))
869 if err != nil {
870 return "", err
871 }
872
873 // get the fragment (with the prefix "#")
874 fragment := ""
875 if len(u.Fragment) > 0 {
876 fragment = "#" + u.Fragment
877 }
878
879 // reset the fragment: it is not included in the mortyurl
880 u.Fragment = ""
881
882 // merge the URI with the document URI
883 u = mergeURIs(rc.BaseURL, u)
884
885 // simple internal link ?
886 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
887 if u.Scheme == rc.BaseURL.Scheme &&
888 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
889 u.Host == rc.BaseURL.Host &&
890 u.Path == rc.BaseURL.Path &&
891 u.RawQuery == rc.BaseURL.RawQuery {
892 // the fragment is the only difference between the document URI and the uri parameter
893 return fragment, nil
894 }
895
896 // return full URI and fragment (if not empty)
897 morty_uri := u.String()
898
899 if rc.Key == nil {
900 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
901 }
902 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
903}
904
905func inArray(b []byte, a [][]byte) bool {
906 for _, b2 := range a {
907 if bytes.Equal(b, b2) {
908 return true
909 }
910 }
911 return false
912}
913
914func hash(msg string, key []byte) string {
915 mac := hmac.New(sha256.New, key)
916 mac.Write([]byte(msg))
917 return hex.EncodeToString(mac.Sum(nil))
918}
919
920func verifyRequestURI(uri, hashMsg, key []byte) bool {
921 h := make([]byte, hex.DecodedLen(len(hashMsg)))
922 _, err := hex.Decode(h, hashMsg)
923 if err != nil {
924 if DEBUG {
925 log.Println("hmac error:", err)
926 }
927 return false
928 }
929 mac := hmac.New(sha256.New, key)
930 mac.Write(uri)
931 return hmac.Equal(h, mac.Sum(nil))
932}
933
934func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
935 ctx.SetContentType("text/html")
936 ctx.SetStatusCode(403)
937 ctx.Write([]byte(MORTY_HTML_PAGE_START))
938 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
939 ctx.Write([]byte("<p>Following</p><p><a href=\""))
940 ctx.Write([]byte(html.EscapeString(uri.String())))
941 ctx.Write([]byte("\" rel=\"noreferrer\">"))
942 ctx.Write([]byte(html.EscapeString(uri.String())))
943 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
944 ctx.Write([]byte(MORTY_HTML_PAGE_END))
945}
946
947func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
948 ctx.SetContentType("text/html; charset=UTF-8")
949 ctx.SetStatusCode(statusCode)
950 ctx.Write([]byte(MORTY_HTML_PAGE_START))
951 if err != nil {
952 if DEBUG {
953 log.Println("error:", err)
954 }
955 ctx.Write([]byte("<h2>Error: "))
956 ctx.Write([]byte(html.EscapeString(err.Error())))
957 ctx.Write([]byte("</h2>"))
958 }
959 if p.Key == nil {
960 ctx.Write([]byte(`
961 <form action="post">
962 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
963 <input type="submit" value="go" />
964 </form>`))
965 } else {
966 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
967 }
968 ctx.Write([]byte(MORTY_HTML_PAGE_END))
969}
970
971func main() {
972 default_listen_addr := os.Getenv("MORTY_ADDRESS")
973 if default_listen_addr == "" {
974 default_listen_addr = "127.0.0.1:3000"
975 }
976 default_key := os.Getenv("MORTY_KEY")
977 listen := flag.String("listen", default_listen_addr, "Listen address")
978 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
979 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
980 version := flag.Bool("version", false, "Show version")
981 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
982 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
983 flag.Parse()
984
985 if *version {
986 fmt.Println(VERSION)
987 return
988 }
989
990 if *ipv6 {
991 CLIENT.DialDualStack = true
992 }
993
994 if *socks5 != "" {
995 // this disables CLIENT.DialDualStack
996 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
997 }
998
999 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
1000
1001 if *key != "" {
1002 var err error
1003 p.Key, err = base64.StdEncoding.DecodeString(*key)
1004 if err != nil {
1005 log.Fatal("Error parsing -key", err.Error())
1006 os.Exit(1)
1007 }
1008 }
1009
1010 log.Println("listening on", *listen)
1011
1012 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1013 log.Fatal("Error in ListenAndServe:", err)
1014 }
1015}
Note: See TracBrowser for help on using the repository browser.