source: code/trunk/morty.go@ 122

Last change on this file since 122 was 122, checked in by asciimoo, 5 years ago

[fix] rename variable

File size: 29.0 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "github.com/asciimoo/morty/contenttype"
31)
32
33const (
34 STATE_DEFAULT int = 0
35 STATE_IN_STYLE int = 1
36 STATE_IN_NOSCRIPT int = 2
37)
38
39const VERSION = "v0.2.0"
40
41var DEBUG = os.Getenv("DEBUG") != "false"
42
43var CLIENT *fasthttp.Client = &fasthttp.Client{
44 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
45 ReadBufferSize: 16 * 1024, // 16K
46}
47
48var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
49
50var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
51 // html
52 contenttype.NewFilterEquals("text", "html", ""),
53 contenttype.NewFilterEquals("application", "xhtml", "xml"),
54 // css
55 contenttype.NewFilterEquals("text", "css", ""),
56 // images
57 contenttype.NewFilterEquals("image", "gif", ""),
58 contenttype.NewFilterEquals("image", "png", ""),
59 contenttype.NewFilterEquals("image", "jpeg", ""),
60 contenttype.NewFilterEquals("image", "pjpeg", ""),
61 contenttype.NewFilterEquals("image", "webp", ""),
62 contenttype.NewFilterEquals("image", "tiff", ""),
63 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
64 contenttype.NewFilterEquals("image", "bmp", ""),
65 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
66 contenttype.NewFilterEquals("image", "x-icon", ""),
67 // fonts
68 contenttype.NewFilterEquals("application", "font-otf", ""),
69 contenttype.NewFilterEquals("application", "font-ttf", ""),
70 contenttype.NewFilterEquals("application", "font-woff", ""),
71 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
72})
73
74var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
75 // texts
76 contenttype.NewFilterEquals("text", "csv", ""),
77 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
78 contenttype.NewFilterEquals("text", "plain", ""),
79 // API
80 contenttype.NewFilterEquals("application", "json", ""),
81 // Documents
82 contenttype.NewFilterEquals("application", "x-latex", ""),
83 contenttype.NewFilterEquals("application", "pdf", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
88 // Compressed archives
89 contenttype.NewFilterEquals("application", "zip", ""),
90 contenttype.NewFilterEquals("application", "gzip", ""),
91 contenttype.NewFilterEquals("application", "x-compressed", ""),
92 contenttype.NewFilterEquals("application", "x-gtar", ""),
93 contenttype.NewFilterEquals("application", "x-compress", ""),
94 // Generic binary
95 contenttype.NewFilterEquals("application", "octet-stream", ""),
96})
97
98var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
99 "charset": true,
100}
101
102var UNSAFE_ELEMENTS [][]byte = [][]byte{
103 []byte("applet"),
104 []byte("canvas"),
105 []byte("embed"),
106 //[]byte("iframe"),
107 []byte("math"),
108 []byte("script"),
109 []byte("svg"),
110}
111
112var SAFE_ATTRIBUTES [][]byte = [][]byte{
113 []byte("abbr"),
114 []byte("accesskey"),
115 []byte("align"),
116 []byte("alt"),
117 []byte("as"),
118 []byte("autocomplete"),
119 []byte("charset"),
120 []byte("checked"),
121 []byte("class"),
122 []byte("content"),
123 []byte("contenteditable"),
124 []byte("contextmenu"),
125 []byte("dir"),
126 []byte("for"),
127 []byte("height"),
128 []byte("hidden"),
129 []byte("hreflang"),
130 []byte("id"),
131 []byte("lang"),
132 []byte("media"),
133 []byte("method"),
134 []byte("name"),
135 []byte("nowrap"),
136 []byte("placeholder"),
137 []byte("property"),
138 []byte("rel"),
139 []byte("spellcheck"),
140 []byte("tabindex"),
141 []byte("target"),
142 []byte("title"),
143 []byte("translate"),
144 []byte("type"),
145 []byte("value"),
146 []byte("width"),
147}
148
149var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
150 []byte("alternate"),
151 []byte("archives"),
152 []byte("author"),
153 []byte("copyright"),
154 []byte("first"),
155 []byte("help"),
156 []byte("icon"),
157 []byte("index"),
158 []byte("last"),
159 []byte("license"),
160 []byte("manifest"),
161 []byte("next"),
162 []byte("pingback"),
163 []byte("prev"),
164 []byte("publisher"),
165 []byte("search"),
166 []byte("shortcut icon"),
167 []byte("stylesheet"),
168 []byte("up"),
169}
170
171var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
172 // X-UA-Compatible will be added automaticaly, so it can be skipped
173 []byte("date"),
174 []byte("last-modified"),
175 []byte("refresh"), // URL rewrite
176 // []byte("location"), TODO URL rewrite
177 []byte("content-language"),
178}
179
180type Proxy struct {
181 Key []byte
182 RequestTimeout time.Duration
183}
184
185type RequestConfig struct {
186 Key []byte
187 BaseURL *url.URL
188}
189
190type HTMLBodyExtParam struct {
191 BaseURL string
192 HasMortyKey bool
193}
194
195type HTMLFormExtParam struct {
196 BaseURL string
197 MortyHash string
198}
199
200var HTML_FORM_EXTENSION *template.Template
201var HTML_BODY_EXTENSION *template.Template
202var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
203<meta http-equiv="X-UA-Compatible" content="IE=edge">
204<meta name="referrer" content="no-referrer">
205`
206
207var MORTY_HTML_PAGE_START string = `<!doctype html>
208<html>
209<head>
210<title>MortyProxy</title>
211<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
212<style>
213html { height: 100%; }
214body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
215input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
216input[placeholder] { width:80%; }
217a { text-decoration: none; #2980b9; }
218h1, h2 { font-weight: 200; margin-bottom: 2rem; }
219h1 { font-size: 3em; }
220.container { flex:1; min-height: 100%; margin-bottom: 1em; }
221.footer { margin: 1em; }
222.footer p { font-size: 0.8em; }
223</style>
224</head>
225<body>
226 <div class="container">
227 <h1>MortyProxy</h1>
228`
229
230var MORTY_HTML_PAGE_END string = `
231 </div>
232 <div class="footer">
233 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
234 <a href="https://github.com/asciimoo/morty">view on github</a>
235 </p>
236 </div>
237</body>
238</html>`
239
240var FAVICON_BYTES []byte
241
242func init() {
243 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
244
245 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
246 var err error
247 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
248 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" /><input type="hidden" name="mortyhash" value="{{.MortyHash}}" />`)
249 if err != nil {
250 panic(err)
251 }
252 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
253<input type="checkbox" id="mortytoggle" autocomplete="off" />
254<div id="mortyheader">
255 <form method="get">
256 <label for="mortytoggle">hide</label>
257 <span><a href="/">Morty Proxy</a></span>
258 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
259 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
260 </form>
261</div>
262<style>
263body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
264#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
265#mortyheader p { padding: 0 0 0.7em 0; display: block; }
266#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
267#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
268#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
269input[type=checkbox]#mortytoggle { display: none; }
270input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
271#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
272</style>
273`)
274 if err != nil {
275 panic(err)
276 }
277}
278
279func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
280
281 if appRequestHandler(ctx) {
282 return
283 }
284
285 requestHash := popRequestParam(ctx, []byte("mortyhash"))
286
287 requestURI := popRequestParam(ctx, []byte("mortyurl"))
288
289 if requestURI == nil {
290 p.serveMainPage(ctx, 200, nil)
291 return
292 }
293
294 if p.Key != nil {
295 if !verifyRequestURI(requestURI, requestHash, p.Key) {
296 // HTTP status code 403 : Forbidden
297 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
298 return
299 }
300 }
301
302 requestURIQuery := ctx.QueryArgs().QueryString()
303 if len(requestURIQuery) > 0 {
304 requestURI = append(requestURI, '?')
305 requestURI = append(requestURI, requestURIQuery...)
306 }
307
308 parsedURI, err := url.Parse(string(requestURI))
309
310 if err != nil {
311 // HTTP status code 500 : Internal Server Error
312 p.serveMainPage(ctx, 500, err)
313 return
314 }
315
316 if parsedURI.Scheme == "" {
317 parsedURI.Scheme = "https"
318 requestURI = append([]byte("https://"), requestURI...)
319 }
320
321 // Serve an intermediate page for protocols other than HTTP(S)
322 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
323 p.serveExitMortyPage(ctx, parsedURI)
324 return
325 }
326
327 req := fasthttp.AcquireRequest()
328 defer fasthttp.ReleaseRequest(req)
329 req.SetConnectionClose()
330
331 requestURIStr := string(requestURI)
332
333 if DEBUG {
334 log.Println("getting", requestURIStr)
335 }
336
337 req.SetRequestURI(requestURIStr)
338 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
339
340 resp := fasthttp.AcquireResponse()
341 defer fasthttp.ReleaseResponse(resp)
342
343 req.Header.SetMethodBytes(ctx.Method())
344 if ctx.IsPost() || ctx.IsPut() {
345 req.SetBody(ctx.PostBody())
346 }
347
348 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
349
350 if err != nil {
351 if err == fasthttp.ErrTimeout {
352 // HTTP status code 504 : Gateway Time-Out
353 p.serveMainPage(ctx, 504, err)
354 } else {
355 // HTTP status code 500 : Internal Server Error
356 p.serveMainPage(ctx, 500, err)
357 }
358 return
359 }
360
361 if resp.StatusCode() != 200 {
362 switch resp.StatusCode() {
363 case 301, 302, 303, 307, 308:
364 loc := resp.Header.Peek("Location")
365 if loc != nil {
366 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
367 url, err := rc.ProxifyURI(loc)
368 if err == nil {
369 ctx.SetStatusCode(resp.StatusCode())
370 ctx.Response.Header.Add("Location", url)
371 if DEBUG {
372 log.Println("redirect to", string(loc))
373 }
374 return
375 }
376 }
377 }
378 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
379 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
380 return
381 }
382
383 contentTypeBytes := resp.Header.Peek("Content-Type")
384
385 if contentTypeBytes == nil {
386 // HTTP status code 503 : Service Unavailable
387 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
388 return
389 }
390
391 contentTypeString := string(contentTypeBytes)
392
393 // decode Content-Type header
394 contentType, error := contenttype.ParseContentType(contentTypeString)
395 if error != nil {
396 // HTTP status code 503 : Service Unavailable
397 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
398 return
399 }
400
401 // content-disposition
402 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
403
404 // check content type
405 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
406 // it is not a usual content type
407 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
408 // force attachment for allowed content type
409 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
410 } else {
411 // deny access to forbidden content type
412 // HTTP status code 403 : Forbidden
413 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
414 return
415 }
416 }
417
418 // HACK : replace */xhtml by text/html
419 if contentType.SubType == "xhtml" {
420 contentType.TopLevelType = "text"
421 contentType.SubType = "html"
422 contentType.Suffix = ""
423 }
424
425 // conversion to UTF-8
426 var responseBody []byte
427
428 if contentType.TopLevelType == "text" {
429 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
430 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
431 responseBody, err = e.NewDecoder().Bytes(resp.Body())
432 if err != nil {
433 // HTTP status code 503 : Service Unavailable
434 p.serveMainPage(ctx, 503, err)
435 return
436 }
437 } else {
438 responseBody = resp.Body()
439 }
440 // update the charset or specify it
441 contentType.Parameters["charset"] = "UTF-8"
442 } else {
443 responseBody = resp.Body()
444 }
445
446 //
447 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
448
449 // set the content type
450 ctx.SetContentType(contentType.String())
451
452 // output according to MIME type
453 switch {
454 case contentType.SubType == "css" && contentType.Suffix == "":
455 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
456 case contentType.SubType == "html" && contentType.Suffix == "":
457 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
458 default:
459 if contentDispositionBytes != nil {
460 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
461 }
462 ctx.Write(responseBody)
463 }
464}
465
466// force content-disposition to attachment
467func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
468 var contentDispositionParams map[string]string
469
470 if contentDispositionBytes != nil {
471 var err error
472 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
473 if err != nil {
474 contentDispositionParams = make(map[string]string)
475 }
476 } else {
477 contentDispositionParams = make(map[string]string)
478 }
479
480 _, fileNameDefined := contentDispositionParams["filename"]
481 if !fileNameDefined {
482 // TODO : sanitize filename
483 contentDispositionParams["fileName"] = filepath.Base(url.Path)
484 }
485
486 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
487}
488
489func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
490 // serve robots.txt
491 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
492 ctx.SetContentType("text/plain")
493 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
494 return true
495 }
496
497 // server favicon.ico
498 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
499 ctx.SetContentType("image/png")
500 ctx.Write(FAVICON_BYTES)
501 return true
502 }
503
504 return false
505}
506
507func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
508 param := ctx.QueryArgs().PeekBytes(paramName)
509
510 if param == nil {
511 param = ctx.PostArgs().PeekBytes(paramName)
512 ctx.PostArgs().DelBytes(paramName)
513 }
514 ctx.QueryArgs().DelBytes(paramName)
515
516 return param
517}
518
519func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
520 // TODO
521
522 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
523
524 if urlSlices == nil {
525 out.Write(css)
526 return
527 }
528
529 startIndex := 0
530
531 for _, s := range urlSlices {
532 urlStart := s[4]
533 urlEnd := s[5]
534
535 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
536 out.Write(css[startIndex:urlStart])
537 out.Write([]byte(uri))
538 startIndex = urlEnd
539 } else if DEBUG {
540 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
541 }
542 }
543 if startIndex < len(css) {
544 out.Write(css[startIndex:len(css)])
545 }
546}
547
548func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
549 r := bytes.NewReader(htmlDoc)
550 decoder := html.NewTokenizer(r)
551 decoder.AllowCDATA(true)
552
553 unsafeElements := make([][]byte, 0, 8)
554 state := STATE_DEFAULT
555 for {
556 token := decoder.Next()
557 if token == html.ErrorToken {
558 err := decoder.Err()
559 if err != io.EOF {
560 log.Println("failed to parse HTML")
561 }
562 break
563 }
564
565 if len(unsafeElements) == 0 {
566
567 switch token {
568 case html.StartTagToken, html.SelfClosingTagToken:
569 tag, hasAttrs := decoder.TagName()
570 safe := !inArray(tag, UNSAFE_ELEMENTS)
571 if !safe {
572 if token != html.SelfClosingTagToken {
573 var unsafeTag []byte = make([]byte, len(tag))
574 copy(unsafeTag, tag)
575 unsafeElements = append(unsafeElements, unsafeTag)
576 }
577 break
578 }
579 if bytes.Equal(tag, []byte("base")) {
580 for {
581 attrName, attrValue, moreAttr := decoder.TagAttr()
582 if bytes.Equal(attrName, []byte("href")) {
583 parsedURI, err := url.Parse(string(attrValue))
584 if err == nil {
585 rc.BaseURL = parsedURI
586 }
587 }
588 if !moreAttr {
589 break
590 }
591 }
592 break
593 }
594 if bytes.Equal(tag, []byte("noscript")) {
595 state = STATE_IN_NOSCRIPT
596 break
597 }
598 var attrs [][][]byte
599 if hasAttrs {
600 for {
601 attrName, attrValue, moreAttr := decoder.TagAttr()
602 attrs = append(attrs, [][]byte{
603 attrName,
604 attrValue,
605 []byte(html.EscapeString(string(attrValue))),
606 })
607 if !moreAttr {
608 break
609 }
610 }
611 }
612 if bytes.Equal(tag, []byte("link")) {
613 sanitizeLinkTag(rc, out, attrs)
614 break
615 }
616
617 if bytes.Equal(tag, []byte("meta")) {
618 sanitizeMetaTag(rc, out, attrs)
619 break
620 }
621
622 fmt.Fprintf(out, "<%s", tag)
623
624 if hasAttrs {
625 sanitizeAttrs(rc, out, attrs)
626 }
627
628 if token == html.SelfClosingTagToken {
629 fmt.Fprintf(out, " />")
630 } else {
631 fmt.Fprintf(out, ">")
632 if bytes.Equal(tag, []byte("style")) {
633 state = STATE_IN_STYLE
634 }
635 }
636
637 if bytes.Equal(tag, []byte("head")) {
638 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
639 }
640
641 if bytes.Equal(tag, []byte("form")) {
642 var formURL *url.URL
643 for _, attr := range attrs {
644 if bytes.Equal(attr[0], []byte("action")) {
645 formURL, _ = url.Parse(string(attr[1]))
646 formURL = mergeURIs(rc.BaseURL, formURL)
647 break
648 }
649 }
650 if formURL == nil {
651 formURL = rc.BaseURL
652 }
653 urlStr := formURL.String()
654 var key string
655 if rc.Key != nil {
656 key = hash(urlStr, rc.Key)
657 }
658 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
659 if err != nil {
660 if DEBUG {
661 fmt.Println("failed to inject body extension", err)
662 }
663 }
664 }
665
666 case html.EndTagToken:
667 tag, _ := decoder.TagName()
668 writeEndTag := true
669 switch string(tag) {
670 case "body":
671 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
672 if len(rc.Key) > 0 {
673 p.HasMortyKey = true
674 }
675 err := HTML_BODY_EXTENSION.Execute(out, p)
676 if err != nil {
677 if DEBUG {
678 fmt.Println("failed to inject body extension", err)
679 }
680 }
681 case "style":
682 state = STATE_DEFAULT
683 case "noscript":
684 state = STATE_DEFAULT
685 writeEndTag = false
686 }
687 // skip noscript tags - only the tag, not the content, because javascript is sanitized
688 if writeEndTag {
689 fmt.Fprintf(out, "</%s>", tag)
690 }
691
692 case html.TextToken:
693 switch state {
694 case STATE_DEFAULT:
695 fmt.Fprintf(out, "%s", decoder.Raw())
696 case STATE_IN_STYLE:
697 sanitizeCSS(rc, out, decoder.Raw())
698 case STATE_IN_NOSCRIPT:
699 sanitizeHTML(rc, out, decoder.Raw())
700 }
701
702 case html.CommentToken:
703 // ignore comment. TODO : parse IE conditional comment
704
705 case html.DoctypeToken:
706 out.Write(decoder.Raw())
707 }
708 } else {
709 switch token {
710 case html.StartTagToken, html.SelfClosingTagToken:
711 tag, _ := decoder.TagName()
712 if inArray(tag, UNSAFE_ELEMENTS) {
713 unsafeElements = append(unsafeElements, tag)
714 }
715
716 case html.EndTagToken:
717 tag, _ := decoder.TagName()
718 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
719 unsafeElements = unsafeElements[:len(unsafeElements)-1]
720 }
721 }
722 }
723 }
724}
725
726func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
727 exclude := false
728 for _, attr := range attrs {
729 attrName := attr[0]
730 attrValue := attr[1]
731 if bytes.Equal(attrName, []byte("rel")) {
732 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
733 exclude = true
734 break
735 }
736 }
737 if bytes.Equal(attrName, []byte("as")) {
738 if bytes.Equal(attrValue, []byte("script")) {
739 exclude = true
740 break
741 }
742 }
743 }
744
745 if !exclude {
746 out.Write([]byte("<link"))
747 for _, attr := range attrs {
748 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
749 }
750 out.Write([]byte(">"))
751 }
752}
753
754func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
755 var http_equiv []byte
756 var content []byte
757
758 for _, attr := range attrs {
759 attrName := attr[0]
760 attrValue := attr[1]
761 if bytes.Equal(attrName, []byte("http-equiv")) {
762 http_equiv = bytes.ToLower(attrValue)
763 // exclude some <meta http-equiv="..." ..>
764 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
765 return
766 }
767 }
768 if bytes.Equal(attrName, []byte("content")) {
769 content = attrValue
770 }
771 if bytes.Equal(attrName, []byte("charset")) {
772 // exclude <meta charset="...">
773 return
774 }
775 }
776
777 out.Write([]byte("<meta"))
778 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
779 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
780 contentUrl := content[urlIndex+4:]
781 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
782 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
783 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
784 contentUrl = contentUrl[1 : len(contentUrl)-1]
785 }
786 }
787 // output proxify result
788 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
789 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
790 }
791 } else {
792 if len(http_equiv) > 0 {
793 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
794 }
795 sanitizeAttrs(rc, out, attrs)
796 }
797 out.Write([]byte(">"))
798}
799
800func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
801 for _, attr := range attrs {
802 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
803 }
804}
805
806func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
807 if inArray(attrName, SAFE_ATTRIBUTES) {
808 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
809 return
810 }
811 switch string(attrName) {
812 case "src", "href", "action":
813 if uri, err := rc.ProxifyURI(attrValue); err == nil {
814 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
815 } else if DEBUG {
816 log.Println("cannot proxify uri:", string(attrValue))
817 }
818 case "style":
819 cssAttr := bytes.NewBuffer(nil)
820 sanitizeCSS(rc, cssAttr, attrValue)
821 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
822 }
823}
824
825func mergeURIs(u1, u2 *url.URL) *url.URL {
826 if u2 == nil {
827 return u1
828 }
829 return u1.ResolveReference(u2)
830}
831
832// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
833// avoid memory allocation (except for the scheme)
834func sanitizeURI(uri []byte) ([]byte, string) {
835 first_rune_index := 0
836 first_rune_seen := false
837 scheme_last_index := -1
838 buffer := bytes.NewBuffer(make([]byte, 0, 10))
839
840 // remove trailing space and special characters
841 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
842
843 // loop over byte by byte
844 for i, c := range uri {
845 // ignore special characters and space (c <= 32)
846 if c > 32 {
847 // append to the lower case of the rune to buffer
848 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
849 c = c + 'a' - 'A'
850 }
851
852 buffer.WriteByte(c)
853
854 // update the first rune index that is not a special rune
855 if !first_rune_seen {
856 first_rune_index = i
857 first_rune_seen = true
858 }
859
860 if c == ':' {
861 // colon rune found, we have found the scheme
862 scheme_last_index = i
863 break
864 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
865 // special case : most probably a relative URI
866 break
867 }
868 }
869 }
870
871 if scheme_last_index != -1 {
872 // scheme found
873 // copy the "lower case without special runes scheme" before the ":" rune
874 scheme_start_index := scheme_last_index - buffer.Len() + 1
875 copy(uri[scheme_start_index:], buffer.Bytes())
876 // and return the result
877 return uri[scheme_start_index:], buffer.String()
878 } else {
879 // scheme NOT found
880 return uri[first_rune_index:], ""
881 }
882}
883
884func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
885 // sanitize URI
886 uri, scheme := sanitizeURI(uri)
887
888 // remove javascript protocol
889 if scheme == "javascript:" {
890 return "", nil
891 }
892
893 // TODO check malicious data: - e.g. data:script
894 if scheme == "data:" {
895 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
896 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
897 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
898 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
899 bytes.HasPrefix(uri, []byte("data:image/webp")) {
900 // should be safe
901 return string(uri), nil
902 } else {
903 // unsafe data
904 return "", nil
905 }
906 }
907
908 // parse the uri
909 u, err := url.Parse(string(uri))
910 if err != nil {
911 return "", err
912 }
913
914 // get the fragment (with the prefix "#")
915 fragment := ""
916 if len(u.Fragment) > 0 {
917 fragment = "#" + u.Fragment
918 }
919
920 // reset the fragment: it is not included in the mortyurl
921 u.Fragment = ""
922
923 // merge the URI with the document URI
924 u = mergeURIs(rc.BaseURL, u)
925
926 // simple internal link ?
927 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
928 if u.Scheme == rc.BaseURL.Scheme &&
929 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
930 u.Host == rc.BaseURL.Host &&
931 u.Path == rc.BaseURL.Path &&
932 u.RawQuery == rc.BaseURL.RawQuery {
933 // the fragment is the only difference between the document URI and the uri parameter
934 return fragment, nil
935 }
936
937 // return full URI and fragment (if not empty)
938 morty_uri := u.String()
939
940 if rc.Key == nil {
941 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
942 }
943 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
944}
945
946func inArray(b []byte, a [][]byte) bool {
947 for _, b2 := range a {
948 if bytes.Equal(b, b2) {
949 return true
950 }
951 }
952 return false
953}
954
955func hash(msg string, key []byte) string {
956 mac := hmac.New(sha256.New, key)
957 mac.Write([]byte(msg))
958 return hex.EncodeToString(mac.Sum(nil))
959}
960
961func verifyRequestURI(uri, hashMsg, key []byte) bool {
962 h := make([]byte, hex.DecodedLen(len(hashMsg)))
963 _, err := hex.Decode(h, hashMsg)
964 if err != nil {
965 if DEBUG {
966 log.Println("hmac error:", err)
967 }
968 return false
969 }
970 mac := hmac.New(sha256.New, key)
971 mac.Write(uri)
972 return hmac.Equal(h, mac.Sum(nil))
973}
974
975func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
976 ctx.SetContentType("text/html")
977 ctx.SetStatusCode(403)
978 ctx.Write([]byte(MORTY_HTML_PAGE_START))
979 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
980 ctx.Write([]byte("<p>Following</p><p><a href=\""))
981 ctx.Write([]byte(html.EscapeString(uri.String())))
982 ctx.Write([]byte("\" rel=\"noreferrer\">"))
983 ctx.Write([]byte(html.EscapeString(uri.String())))
984 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
985 ctx.Write([]byte(MORTY_HTML_PAGE_END))
986}
987
988func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
989 ctx.SetContentType("text/html; charset=UTF-8")
990 ctx.SetStatusCode(statusCode)
991 ctx.Write([]byte(MORTY_HTML_PAGE_START))
992 if err != nil {
993 if DEBUG {
994 log.Println("error:", err)
995 }
996 ctx.Write([]byte("<h2>Error: "))
997 ctx.Write([]byte(html.EscapeString(err.Error())))
998 ctx.Write([]byte("</h2>"))
999 }
1000 if p.Key == nil {
1001 ctx.Write([]byte(`
1002 <form action="post">
1003 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1004 <input type="submit" value="go" />
1005 </form>`))
1006 } else {
1007 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1008 }
1009 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1010}
1011
1012func main() {
1013 default_listen_addr := os.Getenv("MORTY_ADDRESS")
1014 if default_listen_addr == "" {
1015 default_listen_addr = "127.0.0.1:3000"
1016 }
1017 default_key := os.Getenv("MORTY_KEY")
1018 listen := flag.String("listen", default_listen_addr, "Listen address")
1019 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1020 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
1021 version := flag.Bool("version", false, "Show version")
1022 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
1023 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
1024 flag.Parse()
1025
1026 if *version {
1027 fmt.Println(VERSION)
1028 return
1029 }
1030
1031 if *ipv6 {
1032 CLIENT.DialDualStack = true
1033 }
1034
1035 if *socks5 != "" {
1036 // this disables CLIENT.DialDualStack
1037 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1038 }
1039
1040 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
1041
1042 if *key != "" {
1043 var err error
1044 p.Key, err = base64.StdEncoding.DecodeString(*key)
1045 if err != nil {
1046 log.Fatal("Error parsing -key", err.Error())
1047 os.Exit(1)
1048 }
1049 }
1050
1051 log.Println("listening on", *listen)
1052
1053 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1054 log.Fatal("Error in ListenAndServe:", err)
1055 }
1056}
Note: See TracBrowser for help on using the repository browser.