source: code/trunk/morty.go@ 92

Last change on this file since 92 was 92, checked in by alex, 6 years ago

Dockerfile : Various modifications

  • the key is base64 encoded /!\ break compatibility, see searx project PR #1629
  • make sure to use golang:1.12-alpine to build morty to support TLS 1.3 (from morty to external)
File size: 27.7 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "os"
17 "path/filepath"
18 "regexp"
19 "strings"
20 "time"
21 "unicode/utf8"
22
23 "github.com/valyala/fasthttp"
24 "golang.org/x/net/html"
25 "golang.org/x/net/html/charset"
26 "golang.org/x/text/encoding"
27
28 "github.com/asciimoo/morty/contenttype"
29)
30
31const (
32 STATE_DEFAULT int = 0
33 STATE_IN_STYLE int = 1
34 STATE_IN_NOSCRIPT int = 2
35)
36
37const VERSION = "v0.2.0"
38
39var CLIENT *fasthttp.Client = &fasthttp.Client{
40 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
41}
42
43var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
44
45var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
46 // html
47 contenttype.NewFilterEquals("text", "html", ""),
48 contenttype.NewFilterEquals("application", "xhtml", "xml"),
49 // css
50 contenttype.NewFilterEquals("text", "css", ""),
51 // images
52 contenttype.NewFilterEquals("image", "gif", ""),
53 contenttype.NewFilterEquals("image", "png", ""),
54 contenttype.NewFilterEquals("image", "jpeg", ""),
55 contenttype.NewFilterEquals("image", "pjpeg", ""),
56 contenttype.NewFilterEquals("image", "webp", ""),
57 contenttype.NewFilterEquals("image", "tiff", ""),
58 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
59 contenttype.NewFilterEquals("image", "bmp", ""),
60 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
61 contenttype.NewFilterEquals("image", "x-icon", ""),
62 // fonts
63 contenttype.NewFilterEquals("application", "font-otf", ""),
64 contenttype.NewFilterEquals("application", "font-ttf", ""),
65 contenttype.NewFilterEquals("application", "font-woff", ""),
66 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
67})
68
69var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
70 // texts
71 contenttype.NewFilterEquals("text", "csv", ""),
72 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
73 contenttype.NewFilterEquals("text", "plain", ""),
74 // API
75 contenttype.NewFilterEquals("application", "json", ""),
76 // Documents
77 contenttype.NewFilterEquals("application", "x-latex", ""),
78 contenttype.NewFilterEquals("application", "pdf", ""),
79 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
80 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
81 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
82 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
83 // Compressed archives
84 contenttype.NewFilterEquals("application", "zip", ""),
85 contenttype.NewFilterEquals("application", "gzip", ""),
86 contenttype.NewFilterEquals("application", "x-compressed", ""),
87 contenttype.NewFilterEquals("application", "x-gtar", ""),
88 contenttype.NewFilterEquals("application", "x-compress", ""),
89 // Generic binary
90 contenttype.NewFilterEquals("application", "octet-stream", ""),
91})
92
93var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
94 "charset": true,
95}
96
97var UNSAFE_ELEMENTS [][]byte = [][]byte{
98 []byte("applet"),
99 []byte("canvas"),
100 []byte("embed"),
101 //[]byte("iframe"),
102 []byte("math"),
103 []byte("script"),
104 []byte("svg"),
105}
106
107var SAFE_ATTRIBUTES [][]byte = [][]byte{
108 []byte("abbr"),
109 []byte("accesskey"),
110 []byte("align"),
111 []byte("alt"),
112 []byte("as"),
113 []byte("autocomplete"),
114 []byte("charset"),
115 []byte("checked"),
116 []byte("class"),
117 []byte("content"),
118 []byte("contenteditable"),
119 []byte("contextmenu"),
120 []byte("dir"),
121 []byte("for"),
122 []byte("height"),
123 []byte("hidden"),
124 []byte("hreflang"),
125 []byte("id"),
126 []byte("lang"),
127 []byte("media"),
128 []byte("method"),
129 []byte("name"),
130 []byte("nowrap"),
131 []byte("placeholder"),
132 []byte("property"),
133 []byte("rel"),
134 []byte("spellcheck"),
135 []byte("tabindex"),
136 []byte("target"),
137 []byte("title"),
138 []byte("translate"),
139 []byte("type"),
140 []byte("value"),
141 []byte("width"),
142}
143
144var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
145 []byte("area"),
146 []byte("base"),
147 []byte("br"),
148 []byte("col"),
149 []byte("embed"),
150 []byte("hr"),
151 []byte("img"),
152 []byte("input"),
153 []byte("keygen"),
154 []byte("link"),
155 []byte("meta"),
156 []byte("param"),
157 []byte("source"),
158 []byte("track"),
159 []byte("wbr"),
160}
161
162var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
163 []byte("alternate"),
164 []byte("archives"),
165 []byte("author"),
166 []byte("copyright"),
167 []byte("first"),
168 []byte("help"),
169 []byte("icon"),
170 []byte("index"),
171 []byte("last"),
172 []byte("license"),
173 []byte("manifest"),
174 []byte("next"),
175 []byte("pingback"),
176 []byte("prev"),
177 []byte("publisher"),
178 []byte("search"),
179 []byte("shortcut icon"),
180 []byte("stylesheet"),
181 []byte("up"),
182}
183
184var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
185 // X-UA-Compatible will be added automaticaly, so it can be skipped
186 []byte("date"),
187 []byte("last-modified"),
188 []byte("refresh"), // URL rewrite
189 // []byte("location"), TODO URL rewrite
190 []byte("content-language"),
191}
192
193type Proxy struct {
194 Key []byte
195 RequestTimeout time.Duration
196}
197
198type RequestConfig struct {
199 Key []byte
200 BaseURL *url.URL
201}
202
203var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
204
205var HTML_BODY_EXTENSION string = `
206<input type="checkbox" id="mortytoggle" autocomplete="off" />
207<div id="mortyheader">
208 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
209</div>
210<style>
211#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
212#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
213#mortyheader p { padding: 0 0 0.7em 0; display: block; }
214#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
215#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
216input[type=checkbox]#mortytoggle { display: none; }
217input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
218</style>
219`
220
221var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
222<meta http-equiv="X-UA-Compatible" content="IE=edge">
223<meta name="referrer" content="no-referrer">
224`
225
226var MORTY_HTML_PAGE_START string = `<!doctype html>
227<html>
228<head>
229<title>MortyProxy</title>
230<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
231<style>
232html { height: 100%; }
233body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
234input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
235input[placeholder] { width:80%; }
236a { text-decoration: none; #2980b9; }
237h1, h2 { font-weight: 200; margin-bottom: 2rem; }
238h1 { font-size: 3em; }
239.container { flex:1; min-height: 100%; margin-bottom: 1em; }
240.footer { margin: 1em; }
241.footer p { font-size: 0.8em; }
242</style>
243</head>
244<body>
245 <div class="container">
246 <h1>MortyProxy</h1>
247`
248
249var MORTY_HTML_PAGE_END string = `
250 </div>
251 <div class="footer">
252 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
253 <a href="https://github.com/asciimoo/morty">view on github</a>
254 </p>
255 </div>
256</body>
257</html>`
258
259var FAVICON_BYTES []byte
260
261func init() {
262 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
263
264 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
265}
266
267func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
268
269 if appRequestHandler(ctx) {
270 return
271 }
272
273 requestHash := popRequestParam(ctx, []byte("mortyhash"))
274
275 requestURI := popRequestParam(ctx, []byte("mortyurl"))
276
277 if requestURI == nil {
278 p.serveMainPage(ctx, 200, nil)
279 return
280 }
281
282 if p.Key != nil {
283 if !verifyRequestURI(requestURI, requestHash, p.Key) {
284 // HTTP status code 403 : Forbidden
285 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
286 return
287 }
288 }
289
290 parsedURI, err := url.Parse(string(requestURI))
291
292 if err != nil {
293 // HTTP status code 500 : Internal Server Error
294 p.serveMainPage(ctx, 500, err)
295 return
296 }
297
298 // Serve an intermediate page for protocols other than HTTP(S)
299 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
300 p.serveExitMortyPage(ctx, parsedURI)
301 return
302 }
303
304 req := fasthttp.AcquireRequest()
305 defer fasthttp.ReleaseRequest(req)
306 req.SetConnectionClose()
307
308 requestURIStr := string(requestURI)
309
310 log.Println("getting", requestURIStr)
311
312 req.SetRequestURI(requestURIStr)
313 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
314
315 resp := fasthttp.AcquireResponse()
316 defer fasthttp.ReleaseResponse(resp)
317
318 req.Header.SetMethodBytes(ctx.Method())
319 if ctx.IsPost() || ctx.IsPut() {
320 req.SetBody(ctx.PostBody())
321 }
322
323 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
324
325 if err != nil {
326 if err == fasthttp.ErrTimeout {
327 // HTTP status code 504 : Gateway Time-Out
328 p.serveMainPage(ctx, 504, err)
329 } else {
330 // HTTP status code 500 : Internal Server Error
331 p.serveMainPage(ctx, 500, err)
332 }
333 return
334 }
335
336 if resp.StatusCode() != 200 {
337 switch resp.StatusCode() {
338 case 301, 302, 303, 307, 308:
339 loc := resp.Header.Peek("Location")
340 if loc != nil {
341 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
342 url, err := rc.ProxifyURI(loc)
343 if err == nil {
344 ctx.SetStatusCode(resp.StatusCode())
345 ctx.Response.Header.Add("Location", url)
346 log.Println("redirect to", string(loc))
347 return
348 }
349 }
350 }
351 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
352 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
353 return
354 }
355
356 contentTypeBytes := resp.Header.Peek("Content-Type")
357
358 if contentTypeBytes == nil {
359 // HTTP status code 503 : Service Unavailable
360 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
361 return
362 }
363
364 contentTypeString := string(contentTypeBytes)
365
366 // decode Content-Type header
367 contentType, error := contenttype.ParseContentType(contentTypeString)
368 if error != nil {
369 // HTTP status code 503 : Service Unavailable
370 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
371 return
372 }
373
374 // content-disposition
375 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
376
377 // check content type
378 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
379 // it is not a usual content type
380 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
381 // force attachment for allowed content type
382 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
383 } else {
384 // deny access to forbidden content type
385 // HTTP status code 403 : Forbidden
386 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
387 return
388 }
389 }
390
391 // HACK : replace */xhtml by text/html
392 if contentType.SubType == "xhtml" {
393 contentType.TopLevelType = "text"
394 contentType.SubType = "html"
395 contentType.Suffix = ""
396 }
397
398 // conversion to UTF-8
399 var responseBody []byte
400
401 if contentType.TopLevelType == "text" {
402 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
403 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
404 responseBody, err = e.NewDecoder().Bytes(resp.Body())
405 if err != nil {
406 // HTTP status code 503 : Service Unavailable
407 p.serveMainPage(ctx, 503, err)
408 return
409 }
410 } else {
411 responseBody = resp.Body()
412 }
413 // update the charset or specify it
414 contentType.Parameters["charset"] = "UTF-8"
415 } else {
416 responseBody = resp.Body()
417 }
418
419 //
420 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
421
422 // set the content type
423 ctx.SetContentType(contentType.String())
424
425 // output according to MIME type
426 switch {
427 case contentType.SubType == "css" && contentType.Suffix == "":
428 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
429 case contentType.SubType == "html" && contentType.Suffix == "":
430 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
431 default:
432 if contentDispositionBytes != nil {
433 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
434 }
435 ctx.Write(responseBody)
436 }
437}
438
439// force content-disposition to attachment
440func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
441 var contentDispositionParams map[string]string
442
443 if contentDispositionBytes != nil {
444 var err error
445 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
446 if err != nil {
447 contentDispositionParams = make(map[string]string)
448 }
449 } else {
450 contentDispositionParams = make(map[string]string)
451 }
452
453 _, fileNameDefined := contentDispositionParams["filename"]
454 if !fileNameDefined {
455 // TODO : sanitize filename
456 contentDispositionParams["fileName"] = filepath.Base(url.Path)
457 }
458
459 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
460}
461
462func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
463 // serve robots.txt
464 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
465 ctx.SetContentType("text/plain")
466 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
467 return true
468 }
469
470 // server favicon.ico
471 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
472 ctx.SetContentType("image/png")
473 ctx.Write(FAVICON_BYTES)
474 return true
475 }
476
477 return false
478}
479
480func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
481 param := ctx.QueryArgs().PeekBytes(paramName)
482
483 if param == nil {
484 param = ctx.PostArgs().PeekBytes(paramName)
485 if param != nil {
486 ctx.PostArgs().DelBytes(paramName)
487 }
488 } else {
489 ctx.QueryArgs().DelBytes(paramName)
490 }
491
492 return param
493}
494
495func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
496 // TODO
497
498 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
499
500 if urlSlices == nil {
501 out.Write(css)
502 return
503 }
504
505 startIndex := 0
506
507 for _, s := range urlSlices {
508 urlStart := s[4]
509 urlEnd := s[5]
510
511 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
512 out.Write(css[startIndex:urlStart])
513 out.Write([]byte(uri))
514 startIndex = urlEnd
515 } else {
516 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
517 }
518 }
519 if startIndex < len(css) {
520 out.Write(css[startIndex:len(css)])
521 }
522}
523
524func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
525 r := bytes.NewReader(htmlDoc)
526 decoder := html.NewTokenizer(r)
527 decoder.AllowCDATA(true)
528
529 unsafeElements := make([][]byte, 0, 8)
530 state := STATE_DEFAULT
531 for {
532 token := decoder.Next()
533 if token == html.ErrorToken {
534 err := decoder.Err()
535 if err != io.EOF {
536 log.Println("failed to parse HTML:")
537 }
538 break
539 }
540
541 if len(unsafeElements) == 0 {
542
543 switch token {
544 case html.StartTagToken, html.SelfClosingTagToken:
545 tag, hasAttrs := decoder.TagName()
546 safe := !inArray(tag, UNSAFE_ELEMENTS)
547 if !safe {
548 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
549 var unsafeTag []byte = make([]byte, len(tag))
550 copy(unsafeTag, tag)
551 unsafeElements = append(unsafeElements, unsafeTag)
552 }
553 break
554 }
555 if bytes.Equal(tag, []byte("base")) {
556 for {
557 attrName, attrValue, moreAttr := decoder.TagAttr()
558 if bytes.Equal(attrName, []byte("href")) {
559 parsedURI, err := url.Parse(string(attrValue))
560 if err == nil {
561 rc.BaseURL = parsedURI
562 }
563 }
564 if !moreAttr {
565 break
566 }
567 }
568 break
569 }
570 if bytes.Equal(tag, []byte("noscript")) {
571 state = STATE_IN_NOSCRIPT
572 break
573 }
574 var attrs [][][]byte
575 if hasAttrs {
576 for {
577 attrName, attrValue, moreAttr := decoder.TagAttr()
578 attrs = append(attrs, [][]byte{
579 attrName,
580 attrValue,
581 []byte(html.EscapeString(string(attrValue))),
582 })
583 if !moreAttr {
584 break
585 }
586 }
587 }
588 if bytes.Equal(tag, []byte("link")) {
589 sanitizeLinkTag(rc, out, attrs)
590 break
591 }
592
593 if bytes.Equal(tag, []byte("meta")) {
594 sanitizeMetaTag(rc, out, attrs)
595 break
596 }
597
598 fmt.Fprintf(out, "<%s", tag)
599
600 if hasAttrs {
601 sanitizeAttrs(rc, out, attrs)
602 }
603
604 if token == html.SelfClosingTagToken {
605 fmt.Fprintf(out, " />")
606 } else {
607 fmt.Fprintf(out, ">")
608 if bytes.Equal(tag, []byte("style")) {
609 state = STATE_IN_STYLE
610 }
611 }
612
613 if bytes.Equal(tag, []byte("head")) {
614 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
615 }
616
617 if bytes.Equal(tag, []byte("form")) {
618 var formURL *url.URL
619 for _, attr := range attrs {
620 if bytes.Equal(attr[0], []byte("action")) {
621 formURL, _ = url.Parse(string(attr[1]))
622 formURL = mergeURIs(rc.BaseURL, formURL)
623 break
624 }
625 }
626 if formURL == nil {
627 formURL = rc.BaseURL
628 }
629 urlStr := formURL.String()
630 var key string
631 if rc.Key != nil {
632 key = hash(urlStr, rc.Key)
633 }
634 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
635
636 }
637
638 case html.EndTagToken:
639 tag, _ := decoder.TagName()
640 writeEndTag := true
641 switch string(tag) {
642 case "body":
643 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
644 case "style":
645 state = STATE_DEFAULT
646 case "noscript":
647 state = STATE_DEFAULT
648 writeEndTag = false
649 }
650 // skip noscript tags - only the tag, not the content, because javascript is sanitized
651 if writeEndTag {
652 fmt.Fprintf(out, "</%s>", tag)
653 }
654
655 case html.TextToken:
656 switch state {
657 case STATE_DEFAULT:
658 fmt.Fprintf(out, "%s", decoder.Raw())
659 case STATE_IN_STYLE:
660 sanitizeCSS(rc, out, decoder.Raw())
661 case STATE_IN_NOSCRIPT:
662 sanitizeHTML(rc, out, decoder.Raw())
663 }
664
665 case html.CommentToken:
666 // ignore comment. TODO : parse IE conditional comment
667
668 case html.DoctypeToken:
669 out.Write(decoder.Raw())
670 }
671 } else {
672 switch token {
673 case html.StartTagToken:
674 tag, _ := decoder.TagName()
675 if inArray(tag, UNSAFE_ELEMENTS) {
676 unsafeElements = append(unsafeElements, tag)
677 }
678
679 case html.EndTagToken:
680 tag, _ := decoder.TagName()
681 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
682 unsafeElements = unsafeElements[:len(unsafeElements)-1]
683 }
684 }
685 }
686 }
687}
688
689func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
690 exclude := false
691 for _, attr := range attrs {
692 attrName := attr[0]
693 attrValue := attr[1]
694 if bytes.Equal(attrName, []byte("rel")) {
695 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
696 exclude = true
697 break
698 }
699 }
700 if bytes.Equal(attrName, []byte("as")) {
701 if bytes.Equal(attrValue, []byte("script")) {
702 exclude = true
703 break
704 }
705 }
706 }
707
708 if !exclude {
709 out.Write([]byte("<link"))
710 for _, attr := range attrs {
711 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
712 }
713 out.Write([]byte(">"))
714 }
715}
716
717func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
718 var http_equiv []byte
719 var content []byte
720
721 for _, attr := range attrs {
722 attrName := attr[0]
723 attrValue := attr[1]
724 if bytes.Equal(attrName, []byte("http-equiv")) {
725 http_equiv = bytes.ToLower(attrValue)
726 // exclude some <meta http-equiv="..." ..>
727 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
728 return
729 }
730 }
731 if bytes.Equal(attrName, []byte("content")) {
732 content = attrValue
733 }
734 if bytes.Equal(attrName, []byte("charset")) {
735 // exclude <meta charset="...">
736 return
737 }
738 }
739
740 out.Write([]byte("<meta"))
741 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
742 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
743 contentUrl := content[urlIndex+4:]
744 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
745 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
746 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
747 contentUrl = contentUrl[1 : len(contentUrl)-1]
748 }
749 }
750 // output proxify result
751 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
752 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
753 }
754 } else {
755 if len(http_equiv) > 0 {
756 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
757 }
758 sanitizeAttrs(rc, out, attrs)
759 }
760 out.Write([]byte(">"))
761}
762
763func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
764 for _, attr := range attrs {
765 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
766 }
767}
768
769func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
770 if inArray(attrName, SAFE_ATTRIBUTES) {
771 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
772 return
773 }
774 switch string(attrName) {
775 case "src", "href", "action":
776 if uri, err := rc.ProxifyURI(attrValue); err == nil {
777 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
778 } else {
779 log.Println("cannot proxify uri:", string(attrValue))
780 }
781 case "style":
782 cssAttr := bytes.NewBuffer(nil)
783 sanitizeCSS(rc, cssAttr, attrValue)
784 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
785 }
786}
787
788func mergeURIs(u1, u2 *url.URL) *url.URL {
789 if u2 == nil {
790 return u1
791 }
792 return u1.ResolveReference(u2)
793}
794
795// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
796// avoid memory allocation (except for the scheme)
797func sanitizeURI(uri []byte) ([]byte, string) {
798 first_rune_index := 0
799 first_rune_seen := false
800 scheme_last_index := -1
801 buffer := bytes.NewBuffer(make([]byte, 0, 10))
802
803 // remove trailing space and special characters
804 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
805
806 // loop over byte by byte
807 for i, c := range uri {
808 // ignore special characters and space (c <= 32)
809 if c > 32 {
810 // append to the lower case of the rune to buffer
811 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
812 c = c + 'a' - 'A'
813 }
814
815 buffer.WriteByte(c)
816
817 // update the first rune index that is not a special rune
818 if !first_rune_seen {
819 first_rune_index = i
820 first_rune_seen = true
821 }
822
823 if c == ':' {
824 // colon rune found, we have found the scheme
825 scheme_last_index = i
826 break
827 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
828 // special case : most probably a relative URI
829 break
830 }
831 }
832 }
833
834 if scheme_last_index != -1 {
835 // scheme found
836 // copy the "lower case without special runes scheme" before the ":" rune
837 scheme_start_index := scheme_last_index - buffer.Len() + 1
838 copy(uri[scheme_start_index:], buffer.Bytes())
839 // and return the result
840 return uri[scheme_start_index:], buffer.String()
841 } else {
842 // scheme NOT found
843 return uri[first_rune_index:], ""
844 }
845}
846
847func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
848 // sanitize URI
849 uri, scheme := sanitizeURI(uri)
850
851 // remove javascript protocol
852 if scheme == "javascript:" {
853 return "", nil
854 }
855
856 // TODO check malicious data: - e.g. data:script
857 if scheme == "data:" {
858 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
859 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
860 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
861 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
862 bytes.HasPrefix(uri, []byte("data:image/webp")) {
863 // should be safe
864 return string(uri), nil
865 } else {
866 // unsafe data
867 return "", nil
868 }
869 }
870
871 // parse the uri
872 u, err := url.Parse(string(uri))
873 if err != nil {
874 return "", err
875 }
876
877 // get the fragment (with the prefix "#")
878 fragment := ""
879 if len(u.Fragment) > 0 {
880 fragment = "#" + u.Fragment
881 }
882
883 // reset the fragment: it is not included in the mortyurl
884 u.Fragment = ""
885
886 // merge the URI with the document URI
887 u = mergeURIs(rc.BaseURL, u)
888
889 // simple internal link ?
890 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
891 if u.Scheme == rc.BaseURL.Scheme &&
892 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
893 u.Host == rc.BaseURL.Host &&
894 u.Path == rc.BaseURL.Path &&
895 u.RawQuery == rc.BaseURL.RawQuery {
896 // the fragment is the only difference between the document URI and the uri parameter
897 return fragment, nil
898 }
899
900 // return full URI and fragment (if not empty)
901 morty_uri := u.String()
902
903 if rc.Key == nil {
904 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
905 }
906 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
907}
908
909func inArray(b []byte, a [][]byte) bool {
910 for _, b2 := range a {
911 if bytes.Equal(b, b2) {
912 return true
913 }
914 }
915 return false
916}
917
918func hash(msg string, key []byte) string {
919 mac := hmac.New(sha256.New, key)
920 mac.Write([]byte(msg))
921 return hex.EncodeToString(mac.Sum(nil))
922}
923
924func verifyRequestURI(uri, hashMsg, key []byte) bool {
925 h := make([]byte, hex.DecodedLen(len(hashMsg)))
926 _, err := hex.Decode(h, hashMsg)
927 if err != nil {
928 log.Println("hmac error:", err)
929 return false
930 }
931 mac := hmac.New(sha256.New, key)
932 mac.Write(uri)
933 return hmac.Equal(h, mac.Sum(nil))
934}
935
936func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
937 ctx.SetContentType("text/html")
938 ctx.SetStatusCode(403)
939 ctx.Write([]byte(MORTY_HTML_PAGE_START))
940 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
941 ctx.Write([]byte("<p>Following</p><p><a href=\""))
942 ctx.Write([]byte(html.EscapeString(uri.String())))
943 ctx.Write([]byte("\" rel=\"noreferrer\">"))
944 ctx.Write([]byte(html.EscapeString(uri.String())))
945 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
946 ctx.Write([]byte(MORTY_HTML_PAGE_END))
947}
948
949func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
950 ctx.SetContentType("text/html; charset=UTF-8")
951 ctx.SetStatusCode(statusCode)
952 ctx.Write([]byte(MORTY_HTML_PAGE_START))
953 if err != nil {
954 log.Println("error:", err)
955 ctx.Write([]byte("<h2>Error: "))
956 ctx.Write([]byte(html.EscapeString(err.Error())))
957 ctx.Write([]byte("</h2>"))
958 }
959 if p.Key == nil {
960 ctx.Write([]byte(`
961 <form action="post">
962 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
963 <input type="submit" value="go" />
964 </form>`))
965 } else {
966 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
967 }
968 ctx.Write([]byte(MORTY_HTML_PAGE_END))
969}
970
971func main() {
972 default_listen_addr := os.Getenv("MORTY_ADDRESS")
973 if default_listen_addr == "" {
974 default_listen_addr = "127.0.0.1:3000"
975 }
976 default_key := os.Getenv("MORTY_KEY")
977 listen := flag.String("listen", default_listen_addr, "Listen address")
978 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
979 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
980 version := flag.Bool("version", false, "Show version")
981 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
982 flag.Parse()
983
984 if *version {
985 fmt.Println(VERSION)
986 return
987 }
988
989 if *ipv6 {
990 CLIENT.Dial = fasthttp.DialDualStack
991 }
992
993 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
994
995 if *key != "" {
996 var err error
997 p.Key, err = base64.StdEncoding.DecodeString(*key)
998 if (err != nil) {
999 log.Fatal("Error parsing -key", err.Error())
1000 os.Exit(1)
1001 }
1002 }
1003
1004 log.Println("listening on", *listen)
1005
1006 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1007 log.Fatal("Error in ListenAndServe:", err)
1008 }
1009}
Note: See TracBrowser for help on using the repository browser.