source: code/trunk/morty.go@ 119

Last change on this file since 119 was 119, checked in by asciimoo, 5 years ago

[enh] redesign popup message to a sticky top bar

File size: 28.2 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "os"
17 "path/filepath"
18 "regexp"
19 "strings"
20 "time"
21 "unicode/utf8"
22
23 "github.com/valyala/fasthttp"
24 "github.com/valyala/fasthttp/fasthttpproxy"
25 "golang.org/x/net/html"
26 "golang.org/x/net/html/charset"
27 "golang.org/x/text/encoding"
28
29 "github.com/asciimoo/morty/contenttype"
30)
31
32const (
33 STATE_DEFAULT int = 0
34 STATE_IN_STYLE int = 1
35 STATE_IN_NOSCRIPT int = 2
36)
37
38const VERSION = "v0.2.0"
39
40var DEBUG = os.Getenv("DEBUG") != "false"
41
42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
44 ReadBufferSize: 16 * 1024, // 16K
45}
46
47var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
48
49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
65 contenttype.NewFilterEquals("image", "x-icon", ""),
66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
106 []byte("math"),
107 []byte("script"),
108 []byte("svg"),
109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
116 []byte("as"),
117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
128 []byte("hreflang"),
129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
148var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
149 []byte("alternate"),
150 []byte("archives"),
151 []byte("author"),
152 []byte("copyright"),
153 []byte("first"),
154 []byte("help"),
155 []byte("icon"),
156 []byte("index"),
157 []byte("last"),
158 []byte("license"),
159 []byte("manifest"),
160 []byte("next"),
161 []byte("pingback"),
162 []byte("prev"),
163 []byte("publisher"),
164 []byte("search"),
165 []byte("shortcut icon"),
166 []byte("stylesheet"),
167 []byte("up"),
168}
169
170var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
171 // X-UA-Compatible will be added automaticaly, so it can be skipped
172 []byte("date"),
173 []byte("last-modified"),
174 []byte("refresh"), // URL rewrite
175 // []byte("location"), TODO URL rewrite
176 []byte("content-language"),
177}
178
179type Proxy struct {
180 Key []byte
181 RequestTimeout time.Duration
182}
183
184type RequestConfig struct {
185 Key []byte
186 BaseURL *url.URL
187}
188
189var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
190
191var HTML_BODY_EXTENSION string = `
192<input type="checkbox" id="mortytoggle" autocomplete="off" />
193<div id="mortyheader">
194 <form method="get">
195 <label for="mortytoggle">hide</label>
196 <span><a href="/">Morty Proxy</a></span>
197 <input type="url" value="%s" name="mortyurl" readonly="true" />
198 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="%s" rel="noreferrer">original site</a>.
199 </form>
200</div>
201<style>
202body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
203#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
204#mortyheader p { padding: 0 0 0.7em 0; display: block; }
205#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
206#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
207#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
208input[type=checkbox]#mortytoggle { display: none; }
209input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
210#mortyheader input[type=url] { width: 50%%; padding: 4px; font-size: 16px; }
211</style>
212`
213
214var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
215<meta http-equiv="X-UA-Compatible" content="IE=edge">
216<meta name="referrer" content="no-referrer">
217`
218
219var MORTY_HTML_PAGE_START string = `<!doctype html>
220<html>
221<head>
222<title>MortyProxy</title>
223<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
224<style>
225html { height: 100%; }
226body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
227input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
228input[placeholder] { width:80%; }
229a { text-decoration: none; #2980b9; }
230h1, h2 { font-weight: 200; margin-bottom: 2rem; }
231h1 { font-size: 3em; }
232.container { flex:1; min-height: 100%; margin-bottom: 1em; }
233.footer { margin: 1em; }
234.footer p { font-size: 0.8em; }
235</style>
236</head>
237<body>
238 <div class="container">
239 <h1>MortyProxy</h1>
240`
241
242var MORTY_HTML_PAGE_END string = `
243 </div>
244 <div class="footer">
245 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
246 <a href="https://github.com/asciimoo/morty">view on github</a>
247 </p>
248 </div>
249</body>
250</html>`
251
252var FAVICON_BYTES []byte
253
254func init() {
255 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
256
257 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
258}
259
260func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
261
262 if appRequestHandler(ctx) {
263 return
264 }
265
266 requestHash := popRequestParam(ctx, []byte("mortyhash"))
267
268 requestURI := popRequestParam(ctx, []byte("mortyurl"))
269
270 if requestURI == nil {
271 p.serveMainPage(ctx, 200, nil)
272 return
273 }
274
275 if p.Key != nil {
276 if !verifyRequestURI(requestURI, requestHash, p.Key) {
277 // HTTP status code 403 : Forbidden
278 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
279 return
280 }
281 }
282
283 requestURIQuery := ctx.QueryArgs().QueryString()
284 if len(requestURIQuery) > 0 {
285 requestURI = append(requestURI, '?')
286 requestURI = append(requestURI, requestURIQuery...)
287 }
288
289 parsedURI, err := url.Parse(string(requestURI))
290
291 if err != nil {
292 // HTTP status code 500 : Internal Server Error
293 p.serveMainPage(ctx, 500, err)
294 return
295 }
296
297 // Serve an intermediate page for protocols other than HTTP(S)
298 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
299 p.serveExitMortyPage(ctx, parsedURI)
300 return
301 }
302
303 req := fasthttp.AcquireRequest()
304 defer fasthttp.ReleaseRequest(req)
305 req.SetConnectionClose()
306
307 requestURIStr := string(requestURI)
308
309 if DEBUG {
310 log.Println("getting", requestURIStr)
311 }
312
313 req.SetRequestURI(requestURIStr)
314 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
315
316 resp := fasthttp.AcquireResponse()
317 defer fasthttp.ReleaseResponse(resp)
318
319 req.Header.SetMethodBytes(ctx.Method())
320 if ctx.IsPost() || ctx.IsPut() {
321 req.SetBody(ctx.PostBody())
322 }
323
324 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
325
326 if err != nil {
327 if err == fasthttp.ErrTimeout {
328 // HTTP status code 504 : Gateway Time-Out
329 p.serveMainPage(ctx, 504, err)
330 } else {
331 // HTTP status code 500 : Internal Server Error
332 p.serveMainPage(ctx, 500, err)
333 }
334 return
335 }
336
337 if resp.StatusCode() != 200 {
338 switch resp.StatusCode() {
339 case 301, 302, 303, 307, 308:
340 loc := resp.Header.Peek("Location")
341 if loc != nil {
342 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
343 url, err := rc.ProxifyURI(loc)
344 if err == nil {
345 ctx.SetStatusCode(resp.StatusCode())
346 ctx.Response.Header.Add("Location", url)
347 if DEBUG {
348 log.Println("redirect to", string(loc))
349 }
350 return
351 }
352 }
353 }
354 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
355 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
356 return
357 }
358
359 contentTypeBytes := resp.Header.Peek("Content-Type")
360
361 if contentTypeBytes == nil {
362 // HTTP status code 503 : Service Unavailable
363 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
364 return
365 }
366
367 contentTypeString := string(contentTypeBytes)
368
369 // decode Content-Type header
370 contentType, error := contenttype.ParseContentType(contentTypeString)
371 if error != nil {
372 // HTTP status code 503 : Service Unavailable
373 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
374 return
375 }
376
377 // content-disposition
378 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
379
380 // check content type
381 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
382 // it is not a usual content type
383 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
384 // force attachment for allowed content type
385 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
386 } else {
387 // deny access to forbidden content type
388 // HTTP status code 403 : Forbidden
389 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
390 return
391 }
392 }
393
394 // HACK : replace */xhtml by text/html
395 if contentType.SubType == "xhtml" {
396 contentType.TopLevelType = "text"
397 contentType.SubType = "html"
398 contentType.Suffix = ""
399 }
400
401 // conversion to UTF-8
402 var responseBody []byte
403
404 if contentType.TopLevelType == "text" {
405 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
406 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
407 responseBody, err = e.NewDecoder().Bytes(resp.Body())
408 if err != nil {
409 // HTTP status code 503 : Service Unavailable
410 p.serveMainPage(ctx, 503, err)
411 return
412 }
413 } else {
414 responseBody = resp.Body()
415 }
416 // update the charset or specify it
417 contentType.Parameters["charset"] = "UTF-8"
418 } else {
419 responseBody = resp.Body()
420 }
421
422 //
423 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
424
425 // set the content type
426 ctx.SetContentType(contentType.String())
427
428 // output according to MIME type
429 switch {
430 case contentType.SubType == "css" && contentType.Suffix == "":
431 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
432 case contentType.SubType == "html" && contentType.Suffix == "":
433 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
434 default:
435 if contentDispositionBytes != nil {
436 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
437 }
438 ctx.Write(responseBody)
439 }
440}
441
442// force content-disposition to attachment
443func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
444 var contentDispositionParams map[string]string
445
446 if contentDispositionBytes != nil {
447 var err error
448 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
449 if err != nil {
450 contentDispositionParams = make(map[string]string)
451 }
452 } else {
453 contentDispositionParams = make(map[string]string)
454 }
455
456 _, fileNameDefined := contentDispositionParams["filename"]
457 if !fileNameDefined {
458 // TODO : sanitize filename
459 contentDispositionParams["fileName"] = filepath.Base(url.Path)
460 }
461
462 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
463}
464
465func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
466 // serve robots.txt
467 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
468 ctx.SetContentType("text/plain")
469 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
470 return true
471 }
472
473 // server favicon.ico
474 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
475 ctx.SetContentType("image/png")
476 ctx.Write(FAVICON_BYTES)
477 return true
478 }
479
480 return false
481}
482
483func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
484 param := ctx.QueryArgs().PeekBytes(paramName)
485
486 if param == nil {
487 param = ctx.PostArgs().PeekBytes(paramName)
488 if param != nil {
489 ctx.PostArgs().DelBytes(paramName)
490 }
491 } else {
492 ctx.QueryArgs().DelBytes(paramName)
493 }
494
495 return param
496}
497
498func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
499 // TODO
500
501 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
502
503 if urlSlices == nil {
504 out.Write(css)
505 return
506 }
507
508 startIndex := 0
509
510 for _, s := range urlSlices {
511 urlStart := s[4]
512 urlEnd := s[5]
513
514 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
515 out.Write(css[startIndex:urlStart])
516 out.Write([]byte(uri))
517 startIndex = urlEnd
518 } else if DEBUG {
519 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
520 }
521 }
522 if startIndex < len(css) {
523 out.Write(css[startIndex:len(css)])
524 }
525}
526
527func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
528 r := bytes.NewReader(htmlDoc)
529 decoder := html.NewTokenizer(r)
530 decoder.AllowCDATA(true)
531
532 unsafeElements := make([][]byte, 0, 8)
533 state := STATE_DEFAULT
534 for {
535 token := decoder.Next()
536 if token == html.ErrorToken {
537 err := decoder.Err()
538 if err != io.EOF {
539 log.Println("failed to parse HTML")
540 }
541 break
542 }
543
544 if len(unsafeElements) == 0 {
545
546 switch token {
547 case html.StartTagToken, html.SelfClosingTagToken:
548 tag, hasAttrs := decoder.TagName()
549 safe := !inArray(tag, UNSAFE_ELEMENTS)
550 if !safe {
551 if token != html.SelfClosingTagToken {
552 var unsafeTag []byte = make([]byte, len(tag))
553 copy(unsafeTag, tag)
554 unsafeElements = append(unsafeElements, unsafeTag)
555 }
556 break
557 }
558 if bytes.Equal(tag, []byte("base")) {
559 for {
560 attrName, attrValue, moreAttr := decoder.TagAttr()
561 if bytes.Equal(attrName, []byte("href")) {
562 parsedURI, err := url.Parse(string(attrValue))
563 if err == nil {
564 rc.BaseURL = parsedURI
565 }
566 }
567 if !moreAttr {
568 break
569 }
570 }
571 break
572 }
573 if bytes.Equal(tag, []byte("noscript")) {
574 state = STATE_IN_NOSCRIPT
575 break
576 }
577 var attrs [][][]byte
578 if hasAttrs {
579 for {
580 attrName, attrValue, moreAttr := decoder.TagAttr()
581 attrs = append(attrs, [][]byte{
582 attrName,
583 attrValue,
584 []byte(html.EscapeString(string(attrValue))),
585 })
586 if !moreAttr {
587 break
588 }
589 }
590 }
591 if bytes.Equal(tag, []byte("link")) {
592 sanitizeLinkTag(rc, out, attrs)
593 break
594 }
595
596 if bytes.Equal(tag, []byte("meta")) {
597 sanitizeMetaTag(rc, out, attrs)
598 break
599 }
600
601 fmt.Fprintf(out, "<%s", tag)
602
603 if hasAttrs {
604 sanitizeAttrs(rc, out, attrs)
605 }
606
607 if token == html.SelfClosingTagToken {
608 fmt.Fprintf(out, " />")
609 } else {
610 fmt.Fprintf(out, ">")
611 if bytes.Equal(tag, []byte("style")) {
612 state = STATE_IN_STYLE
613 }
614 }
615
616 if bytes.Equal(tag, []byte("head")) {
617 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
618 }
619
620 if bytes.Equal(tag, []byte("form")) {
621 var formURL *url.URL
622 for _, attr := range attrs {
623 if bytes.Equal(attr[0], []byte("action")) {
624 formURL, _ = url.Parse(string(attr[1]))
625 formURL = mergeURIs(rc.BaseURL, formURL)
626 break
627 }
628 }
629 if formURL == nil {
630 formURL = rc.BaseURL
631 }
632 urlStr := formURL.String()
633 var key string
634 if rc.Key != nil {
635 key = hash(urlStr, rc.Key)
636 }
637 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
638
639 }
640
641 case html.EndTagToken:
642 tag, _ := decoder.TagName()
643 writeEndTag := true
644 switch string(tag) {
645 case "body":
646 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String(), rc.BaseURL.String())
647 case "style":
648 state = STATE_DEFAULT
649 case "noscript":
650 state = STATE_DEFAULT
651 writeEndTag = false
652 }
653 // skip noscript tags - only the tag, not the content, because javascript is sanitized
654 if writeEndTag {
655 fmt.Fprintf(out, "</%s>", tag)
656 }
657
658 case html.TextToken:
659 switch state {
660 case STATE_DEFAULT:
661 fmt.Fprintf(out, "%s", decoder.Raw())
662 case STATE_IN_STYLE:
663 sanitizeCSS(rc, out, decoder.Raw())
664 case STATE_IN_NOSCRIPT:
665 sanitizeHTML(rc, out, decoder.Raw())
666 }
667
668 case html.CommentToken:
669 // ignore comment. TODO : parse IE conditional comment
670
671 case html.DoctypeToken:
672 out.Write(decoder.Raw())
673 }
674 } else {
675 switch token {
676 case html.StartTagToken, html.SelfClosingTagToken:
677 tag, _ := decoder.TagName()
678 if inArray(tag, UNSAFE_ELEMENTS) {
679 unsafeElements = append(unsafeElements, tag)
680 }
681
682 case html.EndTagToken:
683 tag, _ := decoder.TagName()
684 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
685 unsafeElements = unsafeElements[:len(unsafeElements)-1]
686 }
687 }
688 }
689 }
690}
691
692func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
693 exclude := false
694 for _, attr := range attrs {
695 attrName := attr[0]
696 attrValue := attr[1]
697 if bytes.Equal(attrName, []byte("rel")) {
698 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
699 exclude = true
700 break
701 }
702 }
703 if bytes.Equal(attrName, []byte("as")) {
704 if bytes.Equal(attrValue, []byte("script")) {
705 exclude = true
706 break
707 }
708 }
709 }
710
711 if !exclude {
712 out.Write([]byte("<link"))
713 for _, attr := range attrs {
714 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
715 }
716 out.Write([]byte(">"))
717 }
718}
719
720func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
721 var http_equiv []byte
722 var content []byte
723
724 for _, attr := range attrs {
725 attrName := attr[0]
726 attrValue := attr[1]
727 if bytes.Equal(attrName, []byte("http-equiv")) {
728 http_equiv = bytes.ToLower(attrValue)
729 // exclude some <meta http-equiv="..." ..>
730 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
731 return
732 }
733 }
734 if bytes.Equal(attrName, []byte("content")) {
735 content = attrValue
736 }
737 if bytes.Equal(attrName, []byte("charset")) {
738 // exclude <meta charset="...">
739 return
740 }
741 }
742
743 out.Write([]byte("<meta"))
744 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
745 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
746 contentUrl := content[urlIndex+4:]
747 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
748 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
749 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
750 contentUrl = contentUrl[1 : len(contentUrl)-1]
751 }
752 }
753 // output proxify result
754 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
755 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
756 }
757 } else {
758 if len(http_equiv) > 0 {
759 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
760 }
761 sanitizeAttrs(rc, out, attrs)
762 }
763 out.Write([]byte(">"))
764}
765
766func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
767 for _, attr := range attrs {
768 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
769 }
770}
771
772func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
773 if inArray(attrName, SAFE_ATTRIBUTES) {
774 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
775 return
776 }
777 switch string(attrName) {
778 case "src", "href", "action":
779 if uri, err := rc.ProxifyURI(attrValue); err == nil {
780 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
781 } else if DEBUG {
782 log.Println("cannot proxify uri:", string(attrValue))
783 }
784 case "style":
785 cssAttr := bytes.NewBuffer(nil)
786 sanitizeCSS(rc, cssAttr, attrValue)
787 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
788 }
789}
790
791func mergeURIs(u1, u2 *url.URL) *url.URL {
792 if u2 == nil {
793 return u1
794 }
795 return u1.ResolveReference(u2)
796}
797
798// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
799// avoid memory allocation (except for the scheme)
800func sanitizeURI(uri []byte) ([]byte, string) {
801 first_rune_index := 0
802 first_rune_seen := false
803 scheme_last_index := -1
804 buffer := bytes.NewBuffer(make([]byte, 0, 10))
805
806 // remove trailing space and special characters
807 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
808
809 // loop over byte by byte
810 for i, c := range uri {
811 // ignore special characters and space (c <= 32)
812 if c > 32 {
813 // append to the lower case of the rune to buffer
814 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
815 c = c + 'a' - 'A'
816 }
817
818 buffer.WriteByte(c)
819
820 // update the first rune index that is not a special rune
821 if !first_rune_seen {
822 first_rune_index = i
823 first_rune_seen = true
824 }
825
826 if c == ':' {
827 // colon rune found, we have found the scheme
828 scheme_last_index = i
829 break
830 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
831 // special case : most probably a relative URI
832 break
833 }
834 }
835 }
836
837 if scheme_last_index != -1 {
838 // scheme found
839 // copy the "lower case without special runes scheme" before the ":" rune
840 scheme_start_index := scheme_last_index - buffer.Len() + 1
841 copy(uri[scheme_start_index:], buffer.Bytes())
842 // and return the result
843 return uri[scheme_start_index:], buffer.String()
844 } else {
845 // scheme NOT found
846 return uri[first_rune_index:], ""
847 }
848}
849
850func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
851 // sanitize URI
852 uri, scheme := sanitizeURI(uri)
853
854 // remove javascript protocol
855 if scheme == "javascript:" {
856 return "", nil
857 }
858
859 // TODO check malicious data: - e.g. data:script
860 if scheme == "data:" {
861 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
862 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
863 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
864 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
865 bytes.HasPrefix(uri, []byte("data:image/webp")) {
866 // should be safe
867 return string(uri), nil
868 } else {
869 // unsafe data
870 return "", nil
871 }
872 }
873
874 // parse the uri
875 u, err := url.Parse(string(uri))
876 if err != nil {
877 return "", err
878 }
879
880 // get the fragment (with the prefix "#")
881 fragment := ""
882 if len(u.Fragment) > 0 {
883 fragment = "#" + u.Fragment
884 }
885
886 // reset the fragment: it is not included in the mortyurl
887 u.Fragment = ""
888
889 // merge the URI with the document URI
890 u = mergeURIs(rc.BaseURL, u)
891
892 // simple internal link ?
893 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
894 if u.Scheme == rc.BaseURL.Scheme &&
895 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
896 u.Host == rc.BaseURL.Host &&
897 u.Path == rc.BaseURL.Path &&
898 u.RawQuery == rc.BaseURL.RawQuery {
899 // the fragment is the only difference between the document URI and the uri parameter
900 return fragment, nil
901 }
902
903 // return full URI and fragment (if not empty)
904 morty_uri := u.String()
905
906 if rc.Key == nil {
907 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
908 }
909 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
910}
911
912func inArray(b []byte, a [][]byte) bool {
913 for _, b2 := range a {
914 if bytes.Equal(b, b2) {
915 return true
916 }
917 }
918 return false
919}
920
921func hash(msg string, key []byte) string {
922 mac := hmac.New(sha256.New, key)
923 mac.Write([]byte(msg))
924 return hex.EncodeToString(mac.Sum(nil))
925}
926
927func verifyRequestURI(uri, hashMsg, key []byte) bool {
928 h := make([]byte, hex.DecodedLen(len(hashMsg)))
929 _, err := hex.Decode(h, hashMsg)
930 if err != nil {
931 if DEBUG {
932 log.Println("hmac error:", err)
933 }
934 return false
935 }
936 mac := hmac.New(sha256.New, key)
937 mac.Write(uri)
938 return hmac.Equal(h, mac.Sum(nil))
939}
940
941func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
942 ctx.SetContentType("text/html")
943 ctx.SetStatusCode(403)
944 ctx.Write([]byte(MORTY_HTML_PAGE_START))
945 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
946 ctx.Write([]byte("<p>Following</p><p><a href=\""))
947 ctx.Write([]byte(html.EscapeString(uri.String())))
948 ctx.Write([]byte("\" rel=\"noreferrer\">"))
949 ctx.Write([]byte(html.EscapeString(uri.String())))
950 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
951 ctx.Write([]byte(MORTY_HTML_PAGE_END))
952}
953
954func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
955 ctx.SetContentType("text/html; charset=UTF-8")
956 ctx.SetStatusCode(statusCode)
957 ctx.Write([]byte(MORTY_HTML_PAGE_START))
958 if err != nil {
959 if DEBUG {
960 log.Println("error:", err)
961 }
962 ctx.Write([]byte("<h2>Error: "))
963 ctx.Write([]byte(html.EscapeString(err.Error())))
964 ctx.Write([]byte("</h2>"))
965 }
966 if p.Key == nil {
967 ctx.Write([]byte(`
968 <form action="post">
969 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
970 <input type="submit" value="go" />
971 </form>`))
972 } else {
973 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
974 }
975 ctx.Write([]byte(MORTY_HTML_PAGE_END))
976}
977
978func main() {
979 default_listen_addr := os.Getenv("MORTY_ADDRESS")
980 if default_listen_addr == "" {
981 default_listen_addr = "127.0.0.1:3000"
982 }
983 default_key := os.Getenv("MORTY_KEY")
984 listen := flag.String("listen", default_listen_addr, "Listen address")
985 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
986 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
987 version := flag.Bool("version", false, "Show version")
988 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
989 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
990 flag.Parse()
991
992 if *version {
993 fmt.Println(VERSION)
994 return
995 }
996
997 if *ipv6 {
998 CLIENT.DialDualStack = true
999 }
1000
1001 if *socks5 != "" {
1002 // this disables CLIENT.DialDualStack
1003 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1004 }
1005
1006 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
1007
1008 if *key != "" {
1009 var err error
1010 p.Key, err = base64.StdEncoding.DecodeString(*key)
1011 if err != nil {
1012 log.Fatal("Error parsing -key", err.Error())
1013 os.Exit(1)
1014 }
1015 }
1016
1017 log.Println("listening on", *listen)
1018
1019 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1020 log.Fatal("Error in ListenAndServe:", err)
1021 }
1022}
Note: See TracBrowser for help on using the repository browser.