source: code/trunk/morty.go@ 74

Last change on this file since 74 was 74, checked in by asciimoo, 7 years ago

[enh] v0.1.0

File size: 27.3 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "path/filepath"
17 "regexp"
18 "strings"
19 "time"
20 "unicode/utf8"
21
22 "github.com/valyala/fasthttp"
23 "golang.org/x/net/html"
24 "golang.org/x/net/html/charset"
25 "golang.org/x/text/encoding"
26
27 "github.com/asciimoo/morty/contenttype"
28)
29
30const (
31 STATE_DEFAULT int = 0
32 STATE_IN_STYLE int = 1
33 STATE_IN_NOSCRIPT int = 2
34)
35
36const VERSION = "v0.1.0"
37
38var CLIENT *fasthttp.Client = &fasthttp.Client{
39 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
40}
41
42var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
43
44var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
45 // html
46 contenttype.NewFilterEquals("text", "html", ""),
47 contenttype.NewFilterEquals("application", "xhtml", "xml"),
48 // css
49 contenttype.NewFilterEquals("text", "css", ""),
50 // images
51 contenttype.NewFilterEquals("image", "gif", ""),
52 contenttype.NewFilterEquals("image", "png", ""),
53 contenttype.NewFilterEquals("image", "jpeg", ""),
54 contenttype.NewFilterEquals("image", "pjpeg", ""),
55 contenttype.NewFilterEquals("image", "webp", ""),
56 contenttype.NewFilterEquals("image", "tiff", ""),
57 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
58 contenttype.NewFilterEquals("image", "bmp", ""),
59 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
60 // fonts
61 contenttype.NewFilterEquals("application", "font-otf", ""),
62 contenttype.NewFilterEquals("application", "font-ttf", ""),
63 contenttype.NewFilterEquals("application", "font-woff", ""),
64 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
65})
66
67var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
68 // texts
69 contenttype.NewFilterEquals("text", "csv", ""),
70 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
71 contenttype.NewFilterEquals("text", "plain", ""),
72 // API
73 contenttype.NewFilterEquals("application", "json", ""),
74 // Documents
75 contenttype.NewFilterEquals("application", "x-latex", ""),
76 contenttype.NewFilterEquals("application", "pdf", ""),
77 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
78 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
79 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
80 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
81 // Compressed archives
82 contenttype.NewFilterEquals("application", "zip", ""),
83 contenttype.NewFilterEquals("application", "gzip", ""),
84 contenttype.NewFilterEquals("application", "x-compressed", ""),
85 contenttype.NewFilterEquals("application", "x-gtar", ""),
86 contenttype.NewFilterEquals("application", "x-compress", ""),
87 // Generic binary
88 contenttype.NewFilterEquals("application", "octet-stream", ""),
89})
90
91var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
92 "charset": true,
93}
94
95var UNSAFE_ELEMENTS [][]byte = [][]byte{
96 []byte("applet"),
97 []byte("canvas"),
98 []byte("embed"),
99 //[]byte("iframe"),
100 []byte("math"),
101 []byte("script"),
102 []byte("svg"),
103}
104
105var SAFE_ATTRIBUTES [][]byte = [][]byte{
106 []byte("abbr"),
107 []byte("accesskey"),
108 []byte("align"),
109 []byte("alt"),
110 []byte("as"),
111 []byte("autocomplete"),
112 []byte("charset"),
113 []byte("checked"),
114 []byte("class"),
115 []byte("content"),
116 []byte("contenteditable"),
117 []byte("contextmenu"),
118 []byte("dir"),
119 []byte("for"),
120 []byte("height"),
121 []byte("hidden"),
122 []byte("hreflang"),
123 []byte("id"),
124 []byte("lang"),
125 []byte("media"),
126 []byte("method"),
127 []byte("name"),
128 []byte("nowrap"),
129 []byte("placeholder"),
130 []byte("property"),
131 []byte("rel"),
132 []byte("spellcheck"),
133 []byte("tabindex"),
134 []byte("target"),
135 []byte("title"),
136 []byte("translate"),
137 []byte("type"),
138 []byte("value"),
139 []byte("width"),
140}
141
142var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
143 []byte("area"),
144 []byte("base"),
145 []byte("br"),
146 []byte("col"),
147 []byte("embed"),
148 []byte("hr"),
149 []byte("img"),
150 []byte("input"),
151 []byte("keygen"),
152 []byte("link"),
153 []byte("meta"),
154 []byte("param"),
155 []byte("source"),
156 []byte("track"),
157 []byte("wbr"),
158}
159
160var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
161 []byte("alternate"),
162 []byte("archives"),
163 []byte("author"),
164 []byte("copyright"),
165 []byte("first"),
166 []byte("help"),
167 []byte("icon"),
168 []byte("index"),
169 []byte("last"),
170 []byte("license"),
171 []byte("manifest"),
172 []byte("next"),
173 []byte("pingback"),
174 []byte("prev"),
175 []byte("publisher"),
176 []byte("search"),
177 []byte("shortcut icon"),
178 []byte("stylesheet"),
179 []byte("up"),
180}
181
182var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
183 // X-UA-Compatible will be added automaticaly, so it can be skipped
184 []byte("date"),
185 []byte("last-modified"),
186 []byte("refresh"), // URL rewrite
187 // []byte("location"), TODO URL rewrite
188 []byte("content-language"),
189}
190
191type Proxy struct {
192 Key []byte
193 RequestTimeout time.Duration
194}
195
196type RequestConfig struct {
197 Key []byte
198 BaseURL *url.URL
199}
200
201var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
202
203var HTML_BODY_EXTENSION string = `
204<input type="checkbox" id="mortytoggle" autocomplete="off" />
205<div id="mortyheader">
206 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
207</div>
208<style>
209#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
210#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
211#mortyheader p { padding: 0 0 0.7em 0; display: block; }
212#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
213#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
214input[type=checkbox]#mortytoggle { display: none; }
215input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
216</style>
217`
218
219var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
220<meta http-equiv="X-UA-Compatible" content="IE=edge">
221<meta name="referrer" content="no-referrer">
222`
223
224var MORTY_HTML_PAGE_START string = `<!doctype html>
225<html>
226<head>
227<title>MortyProxy</title>
228<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
229<style>
230html { height: 100%; }
231body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
232input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
233input[placeholder] { width:80%; }
234a { text-decoration: none; #2980b9; }
235h1, h2 { font-weight: 200; margin-bottom: 2rem; }
236h1 { font-size: 3em; }
237.container { flex:1; min-height: 100%; margin-bottom: 1em; }
238.footer { margin: 1em; }
239.footer p { font-size: 0.8em; }
240</style>
241</head>
242<body>
243 <div class="container">
244 <h1>MortyProxy</h1>
245`
246
247var MORTY_HTML_PAGE_END string = `
248 </div>
249 <div class="footer">
250 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
251 <a href="https://github.com/asciimoo/morty">view on github</a>
252 </p>
253 </div>
254</body>
255</html>`
256
257var FAVICON_BYTES []byte
258
259func init() {
260 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
261
262 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
263}
264
265func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
266
267 if appRequestHandler(ctx) {
268 return
269 }
270
271 requestHash := popRequestParam(ctx, []byte("mortyhash"))
272
273 requestURI := popRequestParam(ctx, []byte("mortyurl"))
274
275 if requestURI == nil {
276 p.serveMainPage(ctx, 200, nil)
277 return
278 }
279
280 if p.Key != nil {
281 if !verifyRequestURI(requestURI, requestHash, p.Key) {
282 // HTTP status code 403 : Forbidden
283 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
284 return
285 }
286 }
287
288 parsedURI, err := url.Parse(string(requestURI))
289
290 if err != nil {
291 // HTTP status code 500 : Internal Server Error
292 p.serveMainPage(ctx, 500, err)
293 return
294 }
295
296 // Serve an intermediate page for protocols other than HTTP(S)
297 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
298 p.serveExitMortyPage(ctx, parsedURI)
299 return
300 }
301
302 req := fasthttp.AcquireRequest()
303 defer fasthttp.ReleaseRequest(req)
304 req.SetConnectionClose()
305
306 requestURIStr := string(requestURI)
307
308 log.Println("getting", requestURIStr)
309
310 req.SetRequestURI(requestURIStr)
311 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
312
313 resp := fasthttp.AcquireResponse()
314 defer fasthttp.ReleaseResponse(resp)
315
316 req.Header.SetMethodBytes(ctx.Method())
317 if ctx.IsPost() || ctx.IsPut() {
318 req.SetBody(ctx.PostBody())
319 }
320
321 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
322
323 if err != nil {
324 if err == fasthttp.ErrTimeout {
325 // HTTP status code 504 : Gateway Time-Out
326 p.serveMainPage(ctx, 504, err)
327 } else {
328 // HTTP status code 500 : Internal Server Error
329 p.serveMainPage(ctx, 500, err)
330 }
331 return
332 }
333
334 if resp.StatusCode() != 200 {
335 switch resp.StatusCode() {
336 case 301, 302, 303, 307, 308:
337 loc := resp.Header.Peek("Location")
338 if loc != nil {
339 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
340 url, err := rc.ProxifyURI(loc)
341 if err == nil {
342 ctx.SetStatusCode(resp.StatusCode())
343 ctx.Response.Header.Add("Location", url)
344 log.Println("redirect to", string(loc))
345 return
346 }
347 }
348 }
349 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
350 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
351 return
352 }
353
354 contentTypeBytes := resp.Header.Peek("Content-Type")
355
356 if contentTypeBytes == nil {
357 // HTTP status code 503 : Service Unavailable
358 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
359 return
360 }
361
362 contentTypeString := string(contentTypeBytes)
363
364 // decode Content-Type header
365 contentType, error := contenttype.ParseContentType(contentTypeString)
366 if error != nil {
367 // HTTP status code 503 : Service Unavailable
368 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
369 return
370 }
371
372 // content-disposition
373 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
374
375 // check content type
376 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
377 // it is not a usual content type
378 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
379 // force attachment for allowed content type
380 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
381 } else {
382 // deny access to forbidden content type
383 // HTTP status code 403 : Forbidden
384 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
385 return
386 }
387 }
388
389 // HACK : replace */xhtml by text/html
390 if contentType.SubType == "xhtml" {
391 contentType.TopLevelType = "text"
392 contentType.SubType = "html"
393 contentType.Suffix = ""
394 }
395
396 // conversion to UTF-8
397 var responseBody []byte
398
399 if contentType.TopLevelType == "text" {
400 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
401 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
402 responseBody, err = e.NewDecoder().Bytes(resp.Body())
403 if err != nil {
404 // HTTP status code 503 : Service Unavailable
405 p.serveMainPage(ctx, 503, err)
406 return
407 }
408 } else {
409 responseBody = resp.Body()
410 }
411 // update the charset or specify it
412 contentType.Parameters["charset"] = "UTF-8"
413 } else {
414 responseBody = resp.Body()
415 }
416
417 //
418 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
419
420 // set the content type
421 ctx.SetContentType(contentType.String())
422
423 // output according to MIME type
424 switch {
425 case contentType.SubType == "css" && contentType.Suffix == "":
426 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
427 case contentType.SubType == "html" && contentType.Suffix == "":
428 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
429 default:
430 if contentDispositionBytes != nil {
431 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
432 }
433 ctx.Write(responseBody)
434 }
435}
436
437// force content-disposition to attachment
438func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
439 var contentDispositionParams map[string]string
440
441 if contentDispositionBytes != nil {
442 var err error
443 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
444 if err != nil {
445 contentDispositionParams = make(map[string]string)
446 }
447 } else {
448 contentDispositionParams = make(map[string]string)
449 }
450
451 _, fileNameDefined := contentDispositionParams["filename"]
452 if !fileNameDefined {
453 // TODO : sanitize filename
454 contentDispositionParams["fileName"] = filepath.Base(url.Path)
455 }
456
457 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
458}
459
460func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
461 // serve robots.txt
462 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
463 ctx.SetContentType("text/plain")
464 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
465 return true
466 }
467
468 // server favicon.ico
469 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
470 ctx.SetContentType("image/png")
471 ctx.Write(FAVICON_BYTES)
472 return true
473 }
474
475 return false
476}
477
478func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
479 param := ctx.QueryArgs().PeekBytes(paramName)
480
481 if param == nil {
482 param = ctx.PostArgs().PeekBytes(paramName)
483 if param != nil {
484 ctx.PostArgs().DelBytes(paramName)
485 }
486 } else {
487 ctx.QueryArgs().DelBytes(paramName)
488 }
489
490 return param
491}
492
493func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
494 // TODO
495
496 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
497
498 if urlSlices == nil {
499 out.Write(css)
500 return
501 }
502
503 startIndex := 0
504
505 for _, s := range urlSlices {
506 urlStart := s[4]
507 urlEnd := s[5]
508
509 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
510 out.Write(css[startIndex:urlStart])
511 out.Write([]byte(uri))
512 startIndex = urlEnd
513 } else {
514 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
515 }
516 }
517 if startIndex < len(css) {
518 out.Write(css[startIndex:len(css)])
519 }
520}
521
522func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
523 r := bytes.NewReader(htmlDoc)
524 decoder := html.NewTokenizer(r)
525 decoder.AllowCDATA(true)
526
527 unsafeElements := make([][]byte, 0, 8)
528 state := STATE_DEFAULT
529 for {
530 token := decoder.Next()
531 if token == html.ErrorToken {
532 err := decoder.Err()
533 if err != io.EOF {
534 log.Println("failed to parse HTML:")
535 }
536 break
537 }
538
539 if len(unsafeElements) == 0 {
540
541 switch token {
542 case html.StartTagToken, html.SelfClosingTagToken:
543 tag, hasAttrs := decoder.TagName()
544 safe := !inArray(tag, UNSAFE_ELEMENTS)
545 if !safe {
546 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
547 var unsafeTag []byte = make([]byte, len(tag))
548 copy(unsafeTag, tag)
549 unsafeElements = append(unsafeElements, unsafeTag)
550 }
551 break
552 }
553 if bytes.Equal(tag, []byte("base")) {
554 for {
555 attrName, attrValue, moreAttr := decoder.TagAttr()
556 if bytes.Equal(attrName, []byte("href")) {
557 parsedURI, err := url.Parse(string(attrValue))
558 if err == nil {
559 rc.BaseURL = parsedURI
560 }
561 }
562 if !moreAttr {
563 break
564 }
565 }
566 break
567 }
568 if bytes.Equal(tag, []byte("noscript")) {
569 state = STATE_IN_NOSCRIPT
570 break
571 }
572 var attrs [][][]byte
573 if hasAttrs {
574 for {
575 attrName, attrValue, moreAttr := decoder.TagAttr()
576 attrs = append(attrs, [][]byte{
577 attrName,
578 attrValue,
579 []byte(html.EscapeString(string(attrValue))),
580 })
581 if !moreAttr {
582 break
583 }
584 }
585 }
586 if bytes.Equal(tag, []byte("link")) {
587 sanitizeLinkTag(rc, out, attrs)
588 break
589 }
590
591 if bytes.Equal(tag, []byte("meta")) {
592 sanitizeMetaTag(rc, out, attrs)
593 break
594 }
595
596 fmt.Fprintf(out, "<%s", tag)
597
598 if hasAttrs {
599 sanitizeAttrs(rc, out, attrs)
600 }
601
602 if token == html.SelfClosingTagToken {
603 fmt.Fprintf(out, " />")
604 } else {
605 fmt.Fprintf(out, ">")
606 if bytes.Equal(tag, []byte("style")) {
607 state = STATE_IN_STYLE
608 }
609 }
610
611 if bytes.Equal(tag, []byte("head")) {
612 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
613 }
614
615 if bytes.Equal(tag, []byte("form")) {
616 var formURL *url.URL
617 for _, attr := range attrs {
618 if bytes.Equal(attr[0], []byte("action")) {
619 formURL, _ = url.Parse(string(attr[1]))
620 formURL = mergeURIs(rc.BaseURL, formURL)
621 break
622 }
623 }
624 if formURL == nil {
625 formURL = rc.BaseURL
626 }
627 urlStr := formURL.String()
628 var key string
629 if rc.Key != nil {
630 key = hash(urlStr, rc.Key)
631 }
632 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
633
634 }
635
636 case html.EndTagToken:
637 tag, _ := decoder.TagName()
638 writeEndTag := true
639 switch string(tag) {
640 case "body":
641 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
642 case "style":
643 state = STATE_DEFAULT
644 case "noscript":
645 state = STATE_DEFAULT
646 writeEndTag = false
647 }
648 // skip noscript tags - only the tag, not the content, because javascript is sanitized
649 if writeEndTag {
650 fmt.Fprintf(out, "</%s>", tag)
651 }
652
653 case html.TextToken:
654 switch state {
655 case STATE_DEFAULT:
656 fmt.Fprintf(out, "%s", decoder.Raw())
657 case STATE_IN_STYLE:
658 sanitizeCSS(rc, out, decoder.Raw())
659 case STATE_IN_NOSCRIPT:
660 sanitizeHTML(rc, out, decoder.Raw())
661 }
662
663 case html.CommentToken:
664 // ignore comment. TODO : parse IE conditional comment
665
666 case html.DoctypeToken:
667 out.Write(decoder.Raw())
668 }
669 } else {
670 switch token {
671 case html.StartTagToken:
672 tag, _ := decoder.TagName()
673 if inArray(tag, UNSAFE_ELEMENTS) {
674 unsafeElements = append(unsafeElements, tag)
675 }
676
677 case html.EndTagToken:
678 tag, _ := decoder.TagName()
679 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
680 unsafeElements = unsafeElements[:len(unsafeElements)-1]
681 }
682 }
683 }
684 }
685}
686
687func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
688 exclude := false
689 for _, attr := range attrs {
690 attrName := attr[0]
691 attrValue := attr[1]
692 if bytes.Equal(attrName, []byte("rel")) {
693 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
694 exclude = true
695 break
696 }
697 }
698 if bytes.Equal(attrName, []byte("as")) {
699 if bytes.Equal(attrValue, []byte("script")) {
700 exclude = true
701 break
702 }
703 }
704 }
705
706 if !exclude {
707 out.Write([]byte("<link"))
708 for _, attr := range attrs {
709 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
710 }
711 out.Write([]byte(">"))
712 }
713}
714
715func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
716 var http_equiv []byte
717 var content []byte
718
719 for _, attr := range attrs {
720 attrName := attr[0]
721 attrValue := attr[1]
722 if bytes.Equal(attrName, []byte("http-equiv")) {
723 http_equiv = bytes.ToLower(attrValue)
724 // exclude some <meta http-equiv="..." ..>
725 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
726 return
727 }
728 }
729 if bytes.Equal(attrName, []byte("content")) {
730 content = attrValue
731 }
732 if bytes.Equal(attrName, []byte("charset")) {
733 // exclude <meta charset="...">
734 return
735 }
736 }
737
738 out.Write([]byte("<meta"))
739 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
740 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
741 contentUrl := content[urlIndex+4:]
742 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
743 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
744 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
745 contentUrl = contentUrl[1 : len(contentUrl)-1]
746 }
747 }
748 // output proxify result
749 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
750 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
751 }
752 } else {
753 if len(http_equiv) > 0 {
754 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
755 }
756 sanitizeAttrs(rc, out, attrs)
757 }
758 out.Write([]byte(">"))
759}
760
761func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
762 for _, attr := range attrs {
763 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
764 }
765}
766
767func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
768 if inArray(attrName, SAFE_ATTRIBUTES) {
769 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
770 return
771 }
772 switch string(attrName) {
773 case "src", "href", "action":
774 if uri, err := rc.ProxifyURI(attrValue); err == nil {
775 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
776 } else {
777 log.Println("cannot proxify uri:", string(attrValue))
778 }
779 case "style":
780 cssAttr := bytes.NewBuffer(nil)
781 sanitizeCSS(rc, cssAttr, attrValue)
782 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
783 }
784}
785
786func mergeURIs(u1, u2 *url.URL) *url.URL {
787 if u2 == nil {
788 return u1
789 }
790 return u1.ResolveReference(u2)
791}
792
793// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
794// avoid memory allocation (except for the scheme)
795func sanitizeURI(uri []byte) ([]byte, string) {
796 first_rune_index := 0
797 first_rune_seen := false
798 scheme_last_index := -1
799 buffer := bytes.NewBuffer(make([]byte, 0, 10))
800
801 // remove trailing space and special characters
802 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
803
804 // loop over byte by byte
805 for i, c := range uri {
806 // ignore special characters and space (c <= 32)
807 if c > 32 {
808 // append to the lower case of the rune to buffer
809 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
810 c = c + 'a' - 'A'
811 }
812
813 buffer.WriteByte(c)
814
815 // update the first rune index that is not a special rune
816 if !first_rune_seen {
817 first_rune_index = i
818 first_rune_seen = true
819 }
820
821 if c == ':' {
822 // colon rune found, we have found the scheme
823 scheme_last_index = i
824 break
825 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
826 // special case : most probably a relative URI
827 break
828 }
829 }
830 }
831
832 if scheme_last_index != -1 {
833 // scheme found
834 // copy the "lower case without special runes scheme" before the ":" rune
835 scheme_start_index := scheme_last_index - buffer.Len() + 1
836 copy(uri[scheme_start_index:], buffer.Bytes())
837 // and return the result
838 return uri[scheme_start_index:], buffer.String()
839 } else {
840 // scheme NOT found
841 return uri[first_rune_index:], ""
842 }
843}
844
845func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
846 // sanitize URI
847 uri, scheme := sanitizeURI(uri)
848
849 // remove javascript protocol
850 if scheme == "javascript:" {
851 return "", nil
852 }
853
854 // TODO check malicious data: - e.g. data:script
855 if scheme == "data:" {
856 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
857 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
858 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
859 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
860 bytes.HasPrefix(uri, []byte("data:image/webp")) {
861 // should be safe
862 return string(uri), nil
863 } else {
864 // unsafe data
865 return "", nil
866 }
867 }
868
869 // parse the uri
870 u, err := url.Parse(string(uri))
871 if err != nil {
872 return "", err
873 }
874
875 // get the fragment (with the prefix "#")
876 fragment := ""
877 if len(u.Fragment) > 0 {
878 fragment = "#" + u.Fragment
879 }
880
881 // reset the fragment: it is not included in the mortyurl
882 u.Fragment = ""
883
884 // merge the URI with the document URI
885 u = mergeURIs(rc.BaseURL, u)
886
887 // simple internal link ?
888 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
889 if u.Scheme == rc.BaseURL.Scheme &&
890 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
891 u.Host == rc.BaseURL.Host &&
892 u.Path == rc.BaseURL.Path &&
893 u.RawQuery == rc.BaseURL.RawQuery {
894 // the fragment is the only difference between the document URI and the uri parameter
895 return fragment, nil
896 }
897
898 // return full URI and fragment (if not empty)
899 morty_uri := u.String()
900
901 if rc.Key == nil {
902 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
903 }
904 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
905}
906
907func inArray(b []byte, a [][]byte) bool {
908 for _, b2 := range a {
909 if bytes.Equal(b, b2) {
910 return true
911 }
912 }
913 return false
914}
915
916func hash(msg string, key []byte) string {
917 mac := hmac.New(sha256.New, key)
918 mac.Write([]byte(msg))
919 return hex.EncodeToString(mac.Sum(nil))
920}
921
922func verifyRequestURI(uri, hashMsg, key []byte) bool {
923 h := make([]byte, hex.DecodedLen(len(hashMsg)))
924 _, err := hex.Decode(h, hashMsg)
925 if err != nil {
926 log.Println("hmac error:", err)
927 return false
928 }
929 mac := hmac.New(sha256.New, key)
930 mac.Write(uri)
931 return hmac.Equal(h, mac.Sum(nil))
932}
933
934func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
935 ctx.SetContentType("text/html")
936 ctx.SetStatusCode(403)
937 ctx.Write([]byte(MORTY_HTML_PAGE_START))
938 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
939 ctx.Write([]byte("<p>Following</p><p><a href=\""))
940 ctx.Write([]byte(html.EscapeString(uri.String())))
941 ctx.Write([]byte("\" rel=\"noreferrer\">"))
942 ctx.Write([]byte(html.EscapeString(uri.String())))
943 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
944 ctx.Write([]byte(MORTY_HTML_PAGE_END))
945}
946
947func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
948 ctx.SetContentType("text/html; charset=UTF-8")
949 ctx.SetStatusCode(statusCode)
950 ctx.Write([]byte(MORTY_HTML_PAGE_START))
951 if err != nil {
952 log.Println("error:", err)
953 ctx.Write([]byte("<h2>Error: "))
954 ctx.Write([]byte(html.EscapeString(err.Error())))
955 ctx.Write([]byte("</h2>"))
956 }
957 if p.Key == nil {
958 ctx.Write([]byte(`
959 <form action="post">
960 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
961 <input type="submit" value="go" />
962 </form>`))
963 } else {
964 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
965 }
966 ctx.Write([]byte(MORTY_HTML_PAGE_END))
967}
968
969func main() {
970
971 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
972 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
973 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
974 version := flag.Bool("version", false, "Show version")
975 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
976 flag.Parse()
977
978 if *version {
979 fmt.Println(VERSION)
980 return
981 }
982
983 if *ipv6 {
984 CLIENT.Dial = fasthttp.DialDualStack
985 }
986
987 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
988
989 if *key != "" {
990 p.Key = []byte(*key)
991 }
992
993 log.Println("listening on", *listen)
994
995 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
996 log.Fatal("Error in ListenAndServe:", err)
997 }
998}
Note: See TracBrowser for help on using the repository browser.