source: code/trunk/morty.go@ 85

Last change on this file since 85 was 78, checked in by asciimoo, 7 years ago

[enh] add environment variable support to configure listen address and secret key - closes #56

File size: 27.5 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "os"
17 "path/filepath"
18 "regexp"
19 "strings"
20 "time"
21 "unicode/utf8"
22
23 "github.com/valyala/fasthttp"
24 "golang.org/x/net/html"
25 "golang.org/x/net/html/charset"
26 "golang.org/x/text/encoding"
27
28 "github.com/asciimoo/morty/contenttype"
29)
30
31const (
32 STATE_DEFAULT int = 0
33 STATE_IN_STYLE int = 1
34 STATE_IN_NOSCRIPT int = 2
35)
36
37const VERSION = "v0.2.0"
38
39var CLIENT *fasthttp.Client = &fasthttp.Client{
40 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
41}
42
43var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
44
45var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
46 // html
47 contenttype.NewFilterEquals("text", "html", ""),
48 contenttype.NewFilterEquals("application", "xhtml", "xml"),
49 // css
50 contenttype.NewFilterEquals("text", "css", ""),
51 // images
52 contenttype.NewFilterEquals("image", "gif", ""),
53 contenttype.NewFilterEquals("image", "png", ""),
54 contenttype.NewFilterEquals("image", "jpeg", ""),
55 contenttype.NewFilterEquals("image", "pjpeg", ""),
56 contenttype.NewFilterEquals("image", "webp", ""),
57 contenttype.NewFilterEquals("image", "tiff", ""),
58 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
59 contenttype.NewFilterEquals("image", "bmp", ""),
60 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
61 // fonts
62 contenttype.NewFilterEquals("application", "font-otf", ""),
63 contenttype.NewFilterEquals("application", "font-ttf", ""),
64 contenttype.NewFilterEquals("application", "font-woff", ""),
65 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
66})
67
68var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
69 // texts
70 contenttype.NewFilterEquals("text", "csv", ""),
71 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
72 contenttype.NewFilterEquals("text", "plain", ""),
73 // API
74 contenttype.NewFilterEquals("application", "json", ""),
75 // Documents
76 contenttype.NewFilterEquals("application", "x-latex", ""),
77 contenttype.NewFilterEquals("application", "pdf", ""),
78 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
79 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
80 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
81 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
82 // Compressed archives
83 contenttype.NewFilterEquals("application", "zip", ""),
84 contenttype.NewFilterEquals("application", "gzip", ""),
85 contenttype.NewFilterEquals("application", "x-compressed", ""),
86 contenttype.NewFilterEquals("application", "x-gtar", ""),
87 contenttype.NewFilterEquals("application", "x-compress", ""),
88 // Generic binary
89 contenttype.NewFilterEquals("application", "octet-stream", ""),
90})
91
92var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
93 "charset": true,
94}
95
96var UNSAFE_ELEMENTS [][]byte = [][]byte{
97 []byte("applet"),
98 []byte("canvas"),
99 []byte("embed"),
100 //[]byte("iframe"),
101 []byte("math"),
102 []byte("script"),
103 []byte("svg"),
104}
105
106var SAFE_ATTRIBUTES [][]byte = [][]byte{
107 []byte("abbr"),
108 []byte("accesskey"),
109 []byte("align"),
110 []byte("alt"),
111 []byte("as"),
112 []byte("autocomplete"),
113 []byte("charset"),
114 []byte("checked"),
115 []byte("class"),
116 []byte("content"),
117 []byte("contenteditable"),
118 []byte("contextmenu"),
119 []byte("dir"),
120 []byte("for"),
121 []byte("height"),
122 []byte("hidden"),
123 []byte("hreflang"),
124 []byte("id"),
125 []byte("lang"),
126 []byte("media"),
127 []byte("method"),
128 []byte("name"),
129 []byte("nowrap"),
130 []byte("placeholder"),
131 []byte("property"),
132 []byte("rel"),
133 []byte("spellcheck"),
134 []byte("tabindex"),
135 []byte("target"),
136 []byte("title"),
137 []byte("translate"),
138 []byte("type"),
139 []byte("value"),
140 []byte("width"),
141}
142
143var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
144 []byte("area"),
145 []byte("base"),
146 []byte("br"),
147 []byte("col"),
148 []byte("embed"),
149 []byte("hr"),
150 []byte("img"),
151 []byte("input"),
152 []byte("keygen"),
153 []byte("link"),
154 []byte("meta"),
155 []byte("param"),
156 []byte("source"),
157 []byte("track"),
158 []byte("wbr"),
159}
160
161var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
162 []byte("alternate"),
163 []byte("archives"),
164 []byte("author"),
165 []byte("copyright"),
166 []byte("first"),
167 []byte("help"),
168 []byte("icon"),
169 []byte("index"),
170 []byte("last"),
171 []byte("license"),
172 []byte("manifest"),
173 []byte("next"),
174 []byte("pingback"),
175 []byte("prev"),
176 []byte("publisher"),
177 []byte("search"),
178 []byte("shortcut icon"),
179 []byte("stylesheet"),
180 []byte("up"),
181}
182
183var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
184 // X-UA-Compatible will be added automaticaly, so it can be skipped
185 []byte("date"),
186 []byte("last-modified"),
187 []byte("refresh"), // URL rewrite
188 // []byte("location"), TODO URL rewrite
189 []byte("content-language"),
190}
191
192type Proxy struct {
193 Key []byte
194 RequestTimeout time.Duration
195}
196
197type RequestConfig struct {
198 Key []byte
199 BaseURL *url.URL
200}
201
202var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
203
204var HTML_BODY_EXTENSION string = `
205<input type="checkbox" id="mortytoggle" autocomplete="off" />
206<div id="mortyheader">
207 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
208</div>
209<style>
210#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
211#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
212#mortyheader p { padding: 0 0 0.7em 0; display: block; }
213#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
214#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
215input[type=checkbox]#mortytoggle { display: none; }
216input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
217</style>
218`
219
220var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
221<meta http-equiv="X-UA-Compatible" content="IE=edge">
222<meta name="referrer" content="no-referrer">
223`
224
225var MORTY_HTML_PAGE_START string = `<!doctype html>
226<html>
227<head>
228<title>MortyProxy</title>
229<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
230<style>
231html { height: 100%; }
232body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
233input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
234input[placeholder] { width:80%; }
235a { text-decoration: none; #2980b9; }
236h1, h2 { font-weight: 200; margin-bottom: 2rem; }
237h1 { font-size: 3em; }
238.container { flex:1; min-height: 100%; margin-bottom: 1em; }
239.footer { margin: 1em; }
240.footer p { font-size: 0.8em; }
241</style>
242</head>
243<body>
244 <div class="container">
245 <h1>MortyProxy</h1>
246`
247
248var MORTY_HTML_PAGE_END string = `
249 </div>
250 <div class="footer">
251 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
252 <a href="https://github.com/asciimoo/morty">view on github</a>
253 </p>
254 </div>
255</body>
256</html>`
257
258var FAVICON_BYTES []byte
259
260func init() {
261 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
262
263 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
264}
265
266func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
267
268 if appRequestHandler(ctx) {
269 return
270 }
271
272 requestHash := popRequestParam(ctx, []byte("mortyhash"))
273
274 requestURI := popRequestParam(ctx, []byte("mortyurl"))
275
276 if requestURI == nil {
277 p.serveMainPage(ctx, 200, nil)
278 return
279 }
280
281 if p.Key != nil {
282 if !verifyRequestURI(requestURI, requestHash, p.Key) {
283 // HTTP status code 403 : Forbidden
284 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
285 return
286 }
287 }
288
289 parsedURI, err := url.Parse(string(requestURI))
290
291 if err != nil {
292 // HTTP status code 500 : Internal Server Error
293 p.serveMainPage(ctx, 500, err)
294 return
295 }
296
297 // Serve an intermediate page for protocols other than HTTP(S)
298 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
299 p.serveExitMortyPage(ctx, parsedURI)
300 return
301 }
302
303 req := fasthttp.AcquireRequest()
304 defer fasthttp.ReleaseRequest(req)
305 req.SetConnectionClose()
306
307 requestURIStr := string(requestURI)
308
309 log.Println("getting", requestURIStr)
310
311 req.SetRequestURI(requestURIStr)
312 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
313
314 resp := fasthttp.AcquireResponse()
315 defer fasthttp.ReleaseResponse(resp)
316
317 req.Header.SetMethodBytes(ctx.Method())
318 if ctx.IsPost() || ctx.IsPut() {
319 req.SetBody(ctx.PostBody())
320 }
321
322 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
323
324 if err != nil {
325 if err == fasthttp.ErrTimeout {
326 // HTTP status code 504 : Gateway Time-Out
327 p.serveMainPage(ctx, 504, err)
328 } else {
329 // HTTP status code 500 : Internal Server Error
330 p.serveMainPage(ctx, 500, err)
331 }
332 return
333 }
334
335 if resp.StatusCode() != 200 {
336 switch resp.StatusCode() {
337 case 301, 302, 303, 307, 308:
338 loc := resp.Header.Peek("Location")
339 if loc != nil {
340 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
341 url, err := rc.ProxifyURI(loc)
342 if err == nil {
343 ctx.SetStatusCode(resp.StatusCode())
344 ctx.Response.Header.Add("Location", url)
345 log.Println("redirect to", string(loc))
346 return
347 }
348 }
349 }
350 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
351 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
352 return
353 }
354
355 contentTypeBytes := resp.Header.Peek("Content-Type")
356
357 if contentTypeBytes == nil {
358 // HTTP status code 503 : Service Unavailable
359 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
360 return
361 }
362
363 contentTypeString := string(contentTypeBytes)
364
365 // decode Content-Type header
366 contentType, error := contenttype.ParseContentType(contentTypeString)
367 if error != nil {
368 // HTTP status code 503 : Service Unavailable
369 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
370 return
371 }
372
373 // content-disposition
374 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
375
376 // check content type
377 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
378 // it is not a usual content type
379 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
380 // force attachment for allowed content type
381 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
382 } else {
383 // deny access to forbidden content type
384 // HTTP status code 403 : Forbidden
385 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
386 return
387 }
388 }
389
390 // HACK : replace */xhtml by text/html
391 if contentType.SubType == "xhtml" {
392 contentType.TopLevelType = "text"
393 contentType.SubType = "html"
394 contentType.Suffix = ""
395 }
396
397 // conversion to UTF-8
398 var responseBody []byte
399
400 if contentType.TopLevelType == "text" {
401 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
402 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
403 responseBody, err = e.NewDecoder().Bytes(resp.Body())
404 if err != nil {
405 // HTTP status code 503 : Service Unavailable
406 p.serveMainPage(ctx, 503, err)
407 return
408 }
409 } else {
410 responseBody = resp.Body()
411 }
412 // update the charset or specify it
413 contentType.Parameters["charset"] = "UTF-8"
414 } else {
415 responseBody = resp.Body()
416 }
417
418 //
419 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
420
421 // set the content type
422 ctx.SetContentType(contentType.String())
423
424 // output according to MIME type
425 switch {
426 case contentType.SubType == "css" && contentType.Suffix == "":
427 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
428 case contentType.SubType == "html" && contentType.Suffix == "":
429 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
430 default:
431 if contentDispositionBytes != nil {
432 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
433 }
434 ctx.Write(responseBody)
435 }
436}
437
438// force content-disposition to attachment
439func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
440 var contentDispositionParams map[string]string
441
442 if contentDispositionBytes != nil {
443 var err error
444 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
445 if err != nil {
446 contentDispositionParams = make(map[string]string)
447 }
448 } else {
449 contentDispositionParams = make(map[string]string)
450 }
451
452 _, fileNameDefined := contentDispositionParams["filename"]
453 if !fileNameDefined {
454 // TODO : sanitize filename
455 contentDispositionParams["fileName"] = filepath.Base(url.Path)
456 }
457
458 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
459}
460
461func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
462 // serve robots.txt
463 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
464 ctx.SetContentType("text/plain")
465 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
466 return true
467 }
468
469 // server favicon.ico
470 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
471 ctx.SetContentType("image/png")
472 ctx.Write(FAVICON_BYTES)
473 return true
474 }
475
476 return false
477}
478
479func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
480 param := ctx.QueryArgs().PeekBytes(paramName)
481
482 if param == nil {
483 param = ctx.PostArgs().PeekBytes(paramName)
484 if param != nil {
485 ctx.PostArgs().DelBytes(paramName)
486 }
487 } else {
488 ctx.QueryArgs().DelBytes(paramName)
489 }
490
491 return param
492}
493
494func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
495 // TODO
496
497 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
498
499 if urlSlices == nil {
500 out.Write(css)
501 return
502 }
503
504 startIndex := 0
505
506 for _, s := range urlSlices {
507 urlStart := s[4]
508 urlEnd := s[5]
509
510 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
511 out.Write(css[startIndex:urlStart])
512 out.Write([]byte(uri))
513 startIndex = urlEnd
514 } else {
515 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
516 }
517 }
518 if startIndex < len(css) {
519 out.Write(css[startIndex:len(css)])
520 }
521}
522
523func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
524 r := bytes.NewReader(htmlDoc)
525 decoder := html.NewTokenizer(r)
526 decoder.AllowCDATA(true)
527
528 unsafeElements := make([][]byte, 0, 8)
529 state := STATE_DEFAULT
530 for {
531 token := decoder.Next()
532 if token == html.ErrorToken {
533 err := decoder.Err()
534 if err != io.EOF {
535 log.Println("failed to parse HTML:")
536 }
537 break
538 }
539
540 if len(unsafeElements) == 0 {
541
542 switch token {
543 case html.StartTagToken, html.SelfClosingTagToken:
544 tag, hasAttrs := decoder.TagName()
545 safe := !inArray(tag, UNSAFE_ELEMENTS)
546 if !safe {
547 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
548 var unsafeTag []byte = make([]byte, len(tag))
549 copy(unsafeTag, tag)
550 unsafeElements = append(unsafeElements, unsafeTag)
551 }
552 break
553 }
554 if bytes.Equal(tag, []byte("base")) {
555 for {
556 attrName, attrValue, moreAttr := decoder.TagAttr()
557 if bytes.Equal(attrName, []byte("href")) {
558 parsedURI, err := url.Parse(string(attrValue))
559 if err == nil {
560 rc.BaseURL = parsedURI
561 }
562 }
563 if !moreAttr {
564 break
565 }
566 }
567 break
568 }
569 if bytes.Equal(tag, []byte("noscript")) {
570 state = STATE_IN_NOSCRIPT
571 break
572 }
573 var attrs [][][]byte
574 if hasAttrs {
575 for {
576 attrName, attrValue, moreAttr := decoder.TagAttr()
577 attrs = append(attrs, [][]byte{
578 attrName,
579 attrValue,
580 []byte(html.EscapeString(string(attrValue))),
581 })
582 if !moreAttr {
583 break
584 }
585 }
586 }
587 if bytes.Equal(tag, []byte("link")) {
588 sanitizeLinkTag(rc, out, attrs)
589 break
590 }
591
592 if bytes.Equal(tag, []byte("meta")) {
593 sanitizeMetaTag(rc, out, attrs)
594 break
595 }
596
597 fmt.Fprintf(out, "<%s", tag)
598
599 if hasAttrs {
600 sanitizeAttrs(rc, out, attrs)
601 }
602
603 if token == html.SelfClosingTagToken {
604 fmt.Fprintf(out, " />")
605 } else {
606 fmt.Fprintf(out, ">")
607 if bytes.Equal(tag, []byte("style")) {
608 state = STATE_IN_STYLE
609 }
610 }
611
612 if bytes.Equal(tag, []byte("head")) {
613 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
614 }
615
616 if bytes.Equal(tag, []byte("form")) {
617 var formURL *url.URL
618 for _, attr := range attrs {
619 if bytes.Equal(attr[0], []byte("action")) {
620 formURL, _ = url.Parse(string(attr[1]))
621 formURL = mergeURIs(rc.BaseURL, formURL)
622 break
623 }
624 }
625 if formURL == nil {
626 formURL = rc.BaseURL
627 }
628 urlStr := formURL.String()
629 var key string
630 if rc.Key != nil {
631 key = hash(urlStr, rc.Key)
632 }
633 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
634
635 }
636
637 case html.EndTagToken:
638 tag, _ := decoder.TagName()
639 writeEndTag := true
640 switch string(tag) {
641 case "body":
642 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
643 case "style":
644 state = STATE_DEFAULT
645 case "noscript":
646 state = STATE_DEFAULT
647 writeEndTag = false
648 }
649 // skip noscript tags - only the tag, not the content, because javascript is sanitized
650 if writeEndTag {
651 fmt.Fprintf(out, "</%s>", tag)
652 }
653
654 case html.TextToken:
655 switch state {
656 case STATE_DEFAULT:
657 fmt.Fprintf(out, "%s", decoder.Raw())
658 case STATE_IN_STYLE:
659 sanitizeCSS(rc, out, decoder.Raw())
660 case STATE_IN_NOSCRIPT:
661 sanitizeHTML(rc, out, decoder.Raw())
662 }
663
664 case html.CommentToken:
665 // ignore comment. TODO : parse IE conditional comment
666
667 case html.DoctypeToken:
668 out.Write(decoder.Raw())
669 }
670 } else {
671 switch token {
672 case html.StartTagToken:
673 tag, _ := decoder.TagName()
674 if inArray(tag, UNSAFE_ELEMENTS) {
675 unsafeElements = append(unsafeElements, tag)
676 }
677
678 case html.EndTagToken:
679 tag, _ := decoder.TagName()
680 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
681 unsafeElements = unsafeElements[:len(unsafeElements)-1]
682 }
683 }
684 }
685 }
686}
687
688func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
689 exclude := false
690 for _, attr := range attrs {
691 attrName := attr[0]
692 attrValue := attr[1]
693 if bytes.Equal(attrName, []byte("rel")) {
694 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
695 exclude = true
696 break
697 }
698 }
699 if bytes.Equal(attrName, []byte("as")) {
700 if bytes.Equal(attrValue, []byte("script")) {
701 exclude = true
702 break
703 }
704 }
705 }
706
707 if !exclude {
708 out.Write([]byte("<link"))
709 for _, attr := range attrs {
710 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
711 }
712 out.Write([]byte(">"))
713 }
714}
715
716func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
717 var http_equiv []byte
718 var content []byte
719
720 for _, attr := range attrs {
721 attrName := attr[0]
722 attrValue := attr[1]
723 if bytes.Equal(attrName, []byte("http-equiv")) {
724 http_equiv = bytes.ToLower(attrValue)
725 // exclude some <meta http-equiv="..." ..>
726 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
727 return
728 }
729 }
730 if bytes.Equal(attrName, []byte("content")) {
731 content = attrValue
732 }
733 if bytes.Equal(attrName, []byte("charset")) {
734 // exclude <meta charset="...">
735 return
736 }
737 }
738
739 out.Write([]byte("<meta"))
740 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
741 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
742 contentUrl := content[urlIndex+4:]
743 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
744 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
745 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
746 contentUrl = contentUrl[1 : len(contentUrl)-1]
747 }
748 }
749 // output proxify result
750 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
751 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
752 }
753 } else {
754 if len(http_equiv) > 0 {
755 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
756 }
757 sanitizeAttrs(rc, out, attrs)
758 }
759 out.Write([]byte(">"))
760}
761
762func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
763 for _, attr := range attrs {
764 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
765 }
766}
767
768func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
769 if inArray(attrName, SAFE_ATTRIBUTES) {
770 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
771 return
772 }
773 switch string(attrName) {
774 case "src", "href", "action":
775 if uri, err := rc.ProxifyURI(attrValue); err == nil {
776 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
777 } else {
778 log.Println("cannot proxify uri:", string(attrValue))
779 }
780 case "style":
781 cssAttr := bytes.NewBuffer(nil)
782 sanitizeCSS(rc, cssAttr, attrValue)
783 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
784 }
785}
786
787func mergeURIs(u1, u2 *url.URL) *url.URL {
788 if u2 == nil {
789 return u1
790 }
791 return u1.ResolveReference(u2)
792}
793
794// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
795// avoid memory allocation (except for the scheme)
796func sanitizeURI(uri []byte) ([]byte, string) {
797 first_rune_index := 0
798 first_rune_seen := false
799 scheme_last_index := -1
800 buffer := bytes.NewBuffer(make([]byte, 0, 10))
801
802 // remove trailing space and special characters
803 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
804
805 // loop over byte by byte
806 for i, c := range uri {
807 // ignore special characters and space (c <= 32)
808 if c > 32 {
809 // append to the lower case of the rune to buffer
810 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
811 c = c + 'a' - 'A'
812 }
813
814 buffer.WriteByte(c)
815
816 // update the first rune index that is not a special rune
817 if !first_rune_seen {
818 first_rune_index = i
819 first_rune_seen = true
820 }
821
822 if c == ':' {
823 // colon rune found, we have found the scheme
824 scheme_last_index = i
825 break
826 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
827 // special case : most probably a relative URI
828 break
829 }
830 }
831 }
832
833 if scheme_last_index != -1 {
834 // scheme found
835 // copy the "lower case without special runes scheme" before the ":" rune
836 scheme_start_index := scheme_last_index - buffer.Len() + 1
837 copy(uri[scheme_start_index:], buffer.Bytes())
838 // and return the result
839 return uri[scheme_start_index:], buffer.String()
840 } else {
841 // scheme NOT found
842 return uri[first_rune_index:], ""
843 }
844}
845
846func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
847 // sanitize URI
848 uri, scheme := sanitizeURI(uri)
849
850 // remove javascript protocol
851 if scheme == "javascript:" {
852 return "", nil
853 }
854
855 // TODO check malicious data: - e.g. data:script
856 if scheme == "data:" {
857 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
858 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
859 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
860 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
861 bytes.HasPrefix(uri, []byte("data:image/webp")) {
862 // should be safe
863 return string(uri), nil
864 } else {
865 // unsafe data
866 return "", nil
867 }
868 }
869
870 // parse the uri
871 u, err := url.Parse(string(uri))
872 if err != nil {
873 return "", err
874 }
875
876 // get the fragment (with the prefix "#")
877 fragment := ""
878 if len(u.Fragment) > 0 {
879 fragment = "#" + u.Fragment
880 }
881
882 // reset the fragment: it is not included in the mortyurl
883 u.Fragment = ""
884
885 // merge the URI with the document URI
886 u = mergeURIs(rc.BaseURL, u)
887
888 // simple internal link ?
889 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
890 if u.Scheme == rc.BaseURL.Scheme &&
891 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
892 u.Host == rc.BaseURL.Host &&
893 u.Path == rc.BaseURL.Path &&
894 u.RawQuery == rc.BaseURL.RawQuery {
895 // the fragment is the only difference between the document URI and the uri parameter
896 return fragment, nil
897 }
898
899 // return full URI and fragment (if not empty)
900 morty_uri := u.String()
901
902 if rc.Key == nil {
903 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
904 }
905 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
906}
907
908func inArray(b []byte, a [][]byte) bool {
909 for _, b2 := range a {
910 if bytes.Equal(b, b2) {
911 return true
912 }
913 }
914 return false
915}
916
917func hash(msg string, key []byte) string {
918 mac := hmac.New(sha256.New, key)
919 mac.Write([]byte(msg))
920 return hex.EncodeToString(mac.Sum(nil))
921}
922
923func verifyRequestURI(uri, hashMsg, key []byte) bool {
924 h := make([]byte, hex.DecodedLen(len(hashMsg)))
925 _, err := hex.Decode(h, hashMsg)
926 if err != nil {
927 log.Println("hmac error:", err)
928 return false
929 }
930 mac := hmac.New(sha256.New, key)
931 mac.Write(uri)
932 return hmac.Equal(h, mac.Sum(nil))
933}
934
935func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
936 ctx.SetContentType("text/html")
937 ctx.SetStatusCode(403)
938 ctx.Write([]byte(MORTY_HTML_PAGE_START))
939 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
940 ctx.Write([]byte("<p>Following</p><p><a href=\""))
941 ctx.Write([]byte(html.EscapeString(uri.String())))
942 ctx.Write([]byte("\" rel=\"noreferrer\">"))
943 ctx.Write([]byte(html.EscapeString(uri.String())))
944 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
945 ctx.Write([]byte(MORTY_HTML_PAGE_END))
946}
947
948func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
949 ctx.SetContentType("text/html; charset=UTF-8")
950 ctx.SetStatusCode(statusCode)
951 ctx.Write([]byte(MORTY_HTML_PAGE_START))
952 if err != nil {
953 log.Println("error:", err)
954 ctx.Write([]byte("<h2>Error: "))
955 ctx.Write([]byte(html.EscapeString(err.Error())))
956 ctx.Write([]byte("</h2>"))
957 }
958 if p.Key == nil {
959 ctx.Write([]byte(`
960 <form action="post">
961 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
962 <input type="submit" value="go" />
963 </form>`))
964 } else {
965 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
966 }
967 ctx.Write([]byte(MORTY_HTML_PAGE_END))
968}
969
970func main() {
971 default_listen_addr := os.Getenv("MORTY_ADDRESS")
972 if default_listen_addr == "" {
973 default_listen_addr = "127.0.0.1:3000"
974 }
975 default_key := os.Getenv("MORTY_KEY")
976 listen := flag.String("listen", default_listen_addr, "Listen address")
977 key := flag.String("key", default_key, "HMAC url validation key (hexadecimal encoded) - leave blank to disable validation")
978 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
979 version := flag.Bool("version", false, "Show version")
980 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
981 flag.Parse()
982
983 if *version {
984 fmt.Println(VERSION)
985 return
986 }
987
988 if *ipv6 {
989 CLIENT.Dial = fasthttp.DialDualStack
990 }
991
992 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
993
994 if *key != "" {
995 p.Key = []byte(*key)
996 }
997
998 log.Println("listening on", *listen)
999
1000 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1001 log.Fatal("Error in ListenAndServe:", err)
1002 }
1003}
Note: See TracBrowser for help on using the repository browser.