source: code/trunk/morty.go@ 120

Last change on this file since 120 was 120, checked in by asciimoo, 5 years ago

[enh] allow opening urls without protocol scheme

File size: 28.3 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "os"
17 "path/filepath"
18 "regexp"
19 "strings"
20 "time"
21 "unicode/utf8"
22
23 "github.com/valyala/fasthttp"
24 "github.com/valyala/fasthttp/fasthttpproxy"
25 "golang.org/x/net/html"
26 "golang.org/x/net/html/charset"
27 "golang.org/x/text/encoding"
28
29 "github.com/asciimoo/morty/contenttype"
30)
31
32const (
33 STATE_DEFAULT int = 0
34 STATE_IN_STYLE int = 1
35 STATE_IN_NOSCRIPT int = 2
36)
37
38const VERSION = "v0.2.0"
39
40var DEBUG = os.Getenv("DEBUG") != "false"
41
42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
44 ReadBufferSize: 16 * 1024, // 16K
45}
46
47var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
48
49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
65 contenttype.NewFilterEquals("image", "x-icon", ""),
66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
106 []byte("math"),
107 []byte("script"),
108 []byte("svg"),
109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
116 []byte("as"),
117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
128 []byte("hreflang"),
129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
148var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
149 []byte("alternate"),
150 []byte("archives"),
151 []byte("author"),
152 []byte("copyright"),
153 []byte("first"),
154 []byte("help"),
155 []byte("icon"),
156 []byte("index"),
157 []byte("last"),
158 []byte("license"),
159 []byte("manifest"),
160 []byte("next"),
161 []byte("pingback"),
162 []byte("prev"),
163 []byte("publisher"),
164 []byte("search"),
165 []byte("shortcut icon"),
166 []byte("stylesheet"),
167 []byte("up"),
168}
169
170var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
171 // X-UA-Compatible will be added automaticaly, so it can be skipped
172 []byte("date"),
173 []byte("last-modified"),
174 []byte("refresh"), // URL rewrite
175 // []byte("location"), TODO URL rewrite
176 []byte("content-language"),
177}
178
179type Proxy struct {
180 Key []byte
181 RequestTimeout time.Duration
182}
183
184type RequestConfig struct {
185 Key []byte
186 BaseURL *url.URL
187}
188
189var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
190
191var HTML_BODY_EXTENSION string = `
192<input type="checkbox" id="mortytoggle" autocomplete="off" />
193<div id="mortyheader">
194 <form method="get">
195 <label for="mortytoggle">hide</label>
196 <span><a href="/">Morty Proxy</a></span>
197 <input type="url" value="%s" name="mortyurl" readonly="true" />
198 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="%s" rel="noreferrer">original site</a>.
199 </form>
200</div>
201<style>
202body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
203#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
204#mortyheader p { padding: 0 0 0.7em 0; display: block; }
205#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
206#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
207#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
208input[type=checkbox]#mortytoggle { display: none; }
209input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
210#mortyheader input[type=url] { width: 50%%; padding: 4px; font-size: 16px; }
211</style>
212`
213
214var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
215<meta http-equiv="X-UA-Compatible" content="IE=edge">
216<meta name="referrer" content="no-referrer">
217`
218
219var MORTY_HTML_PAGE_START string = `<!doctype html>
220<html>
221<head>
222<title>MortyProxy</title>
223<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
224<style>
225html { height: 100%; }
226body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
227input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
228input[placeholder] { width:80%; }
229a { text-decoration: none; #2980b9; }
230h1, h2 { font-weight: 200; margin-bottom: 2rem; }
231h1 { font-size: 3em; }
232.container { flex:1; min-height: 100%; margin-bottom: 1em; }
233.footer { margin: 1em; }
234.footer p { font-size: 0.8em; }
235</style>
236</head>
237<body>
238 <div class="container">
239 <h1>MortyProxy</h1>
240`
241
242var MORTY_HTML_PAGE_END string = `
243 </div>
244 <div class="footer">
245 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
246 <a href="https://github.com/asciimoo/morty">view on github</a>
247 </p>
248 </div>
249</body>
250</html>`
251
252var FAVICON_BYTES []byte
253
254func init() {
255 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
256
257 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
258}
259
260func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
261
262 if appRequestHandler(ctx) {
263 return
264 }
265
266 requestHash := popRequestParam(ctx, []byte("mortyhash"))
267
268 requestURI := popRequestParam(ctx, []byte("mortyurl"))
269
270 if requestURI == nil {
271 p.serveMainPage(ctx, 200, nil)
272 return
273 }
274
275 if p.Key != nil {
276 if !verifyRequestURI(requestURI, requestHash, p.Key) {
277 // HTTP status code 403 : Forbidden
278 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
279 return
280 }
281 }
282
283 requestURIQuery := ctx.QueryArgs().QueryString()
284 if len(requestURIQuery) > 0 {
285 requestURI = append(requestURI, '?')
286 requestURI = append(requestURI, requestURIQuery...)
287 }
288
289 parsedURI, err := url.Parse(string(requestURI))
290
291 if err != nil {
292 // HTTP status code 500 : Internal Server Error
293 p.serveMainPage(ctx, 500, err)
294 return
295 }
296
297 if parsedURI.Scheme == "" {
298 parsedURI.Scheme = "https"
299 requestURI = append([]byte("https://"), requestURI...)
300 }
301
302 // Serve an intermediate page for protocols other than HTTP(S)
303 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
304 p.serveExitMortyPage(ctx, parsedURI)
305 return
306 }
307
308 req := fasthttp.AcquireRequest()
309 defer fasthttp.ReleaseRequest(req)
310 req.SetConnectionClose()
311
312 requestURIStr := string(requestURI)
313
314 if DEBUG {
315 log.Println("getting", requestURIStr)
316 }
317
318 req.SetRequestURI(requestURIStr)
319 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
320
321 resp := fasthttp.AcquireResponse()
322 defer fasthttp.ReleaseResponse(resp)
323
324 req.Header.SetMethodBytes(ctx.Method())
325 if ctx.IsPost() || ctx.IsPut() {
326 req.SetBody(ctx.PostBody())
327 }
328
329 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
330
331 if err != nil {
332 if err == fasthttp.ErrTimeout {
333 // HTTP status code 504 : Gateway Time-Out
334 p.serveMainPage(ctx, 504, err)
335 } else {
336 // HTTP status code 500 : Internal Server Error
337 p.serveMainPage(ctx, 500, err)
338 }
339 return
340 }
341
342 if resp.StatusCode() != 200 {
343 switch resp.StatusCode() {
344 case 301, 302, 303, 307, 308:
345 loc := resp.Header.Peek("Location")
346 if loc != nil {
347 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
348 url, err := rc.ProxifyURI(loc)
349 if err == nil {
350 ctx.SetStatusCode(resp.StatusCode())
351 ctx.Response.Header.Add("Location", url)
352 if DEBUG {
353 log.Println("redirect to", string(loc))
354 }
355 return
356 }
357 }
358 }
359 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
360 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
361 return
362 }
363
364 contentTypeBytes := resp.Header.Peek("Content-Type")
365
366 if contentTypeBytes == nil {
367 // HTTP status code 503 : Service Unavailable
368 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
369 return
370 }
371
372 contentTypeString := string(contentTypeBytes)
373
374 // decode Content-Type header
375 contentType, error := contenttype.ParseContentType(contentTypeString)
376 if error != nil {
377 // HTTP status code 503 : Service Unavailable
378 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
379 return
380 }
381
382 // content-disposition
383 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
384
385 // check content type
386 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
387 // it is not a usual content type
388 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
389 // force attachment for allowed content type
390 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
391 } else {
392 // deny access to forbidden content type
393 // HTTP status code 403 : Forbidden
394 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
395 return
396 }
397 }
398
399 // HACK : replace */xhtml by text/html
400 if contentType.SubType == "xhtml" {
401 contentType.TopLevelType = "text"
402 contentType.SubType = "html"
403 contentType.Suffix = ""
404 }
405
406 // conversion to UTF-8
407 var responseBody []byte
408
409 if contentType.TopLevelType == "text" {
410 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
411 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
412 responseBody, err = e.NewDecoder().Bytes(resp.Body())
413 if err != nil {
414 // HTTP status code 503 : Service Unavailable
415 p.serveMainPage(ctx, 503, err)
416 return
417 }
418 } else {
419 responseBody = resp.Body()
420 }
421 // update the charset or specify it
422 contentType.Parameters["charset"] = "UTF-8"
423 } else {
424 responseBody = resp.Body()
425 }
426
427 //
428 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
429
430 // set the content type
431 ctx.SetContentType(contentType.String())
432
433 // output according to MIME type
434 switch {
435 case contentType.SubType == "css" && contentType.Suffix == "":
436 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
437 case contentType.SubType == "html" && contentType.Suffix == "":
438 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
439 default:
440 if contentDispositionBytes != nil {
441 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
442 }
443 ctx.Write(responseBody)
444 }
445}
446
447// force content-disposition to attachment
448func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
449 var contentDispositionParams map[string]string
450
451 if contentDispositionBytes != nil {
452 var err error
453 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
454 if err != nil {
455 contentDispositionParams = make(map[string]string)
456 }
457 } else {
458 contentDispositionParams = make(map[string]string)
459 }
460
461 _, fileNameDefined := contentDispositionParams["filename"]
462 if !fileNameDefined {
463 // TODO : sanitize filename
464 contentDispositionParams["fileName"] = filepath.Base(url.Path)
465 }
466
467 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
468}
469
470func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
471 // serve robots.txt
472 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
473 ctx.SetContentType("text/plain")
474 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
475 return true
476 }
477
478 // server favicon.ico
479 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
480 ctx.SetContentType("image/png")
481 ctx.Write(FAVICON_BYTES)
482 return true
483 }
484
485 return false
486}
487
488func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
489 param := ctx.QueryArgs().PeekBytes(paramName)
490
491 if param == nil {
492 param = ctx.PostArgs().PeekBytes(paramName)
493 if param != nil {
494 ctx.PostArgs().DelBytes(paramName)
495 }
496 } else {
497 ctx.QueryArgs().DelBytes(paramName)
498 }
499
500 return param
501}
502
503func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
504 // TODO
505
506 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
507
508 if urlSlices == nil {
509 out.Write(css)
510 return
511 }
512
513 startIndex := 0
514
515 for _, s := range urlSlices {
516 urlStart := s[4]
517 urlEnd := s[5]
518
519 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
520 out.Write(css[startIndex:urlStart])
521 out.Write([]byte(uri))
522 startIndex = urlEnd
523 } else if DEBUG {
524 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
525 }
526 }
527 if startIndex < len(css) {
528 out.Write(css[startIndex:len(css)])
529 }
530}
531
532func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
533 r := bytes.NewReader(htmlDoc)
534 decoder := html.NewTokenizer(r)
535 decoder.AllowCDATA(true)
536
537 unsafeElements := make([][]byte, 0, 8)
538 state := STATE_DEFAULT
539 for {
540 token := decoder.Next()
541 if token == html.ErrorToken {
542 err := decoder.Err()
543 if err != io.EOF {
544 log.Println("failed to parse HTML")
545 }
546 break
547 }
548
549 if len(unsafeElements) == 0 {
550
551 switch token {
552 case html.StartTagToken, html.SelfClosingTagToken:
553 tag, hasAttrs := decoder.TagName()
554 safe := !inArray(tag, UNSAFE_ELEMENTS)
555 if !safe {
556 if token != html.SelfClosingTagToken {
557 var unsafeTag []byte = make([]byte, len(tag))
558 copy(unsafeTag, tag)
559 unsafeElements = append(unsafeElements, unsafeTag)
560 }
561 break
562 }
563 if bytes.Equal(tag, []byte("base")) {
564 for {
565 attrName, attrValue, moreAttr := decoder.TagAttr()
566 if bytes.Equal(attrName, []byte("href")) {
567 parsedURI, err := url.Parse(string(attrValue))
568 if err == nil {
569 rc.BaseURL = parsedURI
570 }
571 }
572 if !moreAttr {
573 break
574 }
575 }
576 break
577 }
578 if bytes.Equal(tag, []byte("noscript")) {
579 state = STATE_IN_NOSCRIPT
580 break
581 }
582 var attrs [][][]byte
583 if hasAttrs {
584 for {
585 attrName, attrValue, moreAttr := decoder.TagAttr()
586 attrs = append(attrs, [][]byte{
587 attrName,
588 attrValue,
589 []byte(html.EscapeString(string(attrValue))),
590 })
591 if !moreAttr {
592 break
593 }
594 }
595 }
596 if bytes.Equal(tag, []byte("link")) {
597 sanitizeLinkTag(rc, out, attrs)
598 break
599 }
600
601 if bytes.Equal(tag, []byte("meta")) {
602 sanitizeMetaTag(rc, out, attrs)
603 break
604 }
605
606 fmt.Fprintf(out, "<%s", tag)
607
608 if hasAttrs {
609 sanitizeAttrs(rc, out, attrs)
610 }
611
612 if token == html.SelfClosingTagToken {
613 fmt.Fprintf(out, " />")
614 } else {
615 fmt.Fprintf(out, ">")
616 if bytes.Equal(tag, []byte("style")) {
617 state = STATE_IN_STYLE
618 }
619 }
620
621 if bytes.Equal(tag, []byte("head")) {
622 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
623 }
624
625 if bytes.Equal(tag, []byte("form")) {
626 var formURL *url.URL
627 for _, attr := range attrs {
628 if bytes.Equal(attr[0], []byte("action")) {
629 formURL, _ = url.Parse(string(attr[1]))
630 formURL = mergeURIs(rc.BaseURL, formURL)
631 break
632 }
633 }
634 if formURL == nil {
635 formURL = rc.BaseURL
636 }
637 urlStr := formURL.String()
638 var key string
639 if rc.Key != nil {
640 key = hash(urlStr, rc.Key)
641 }
642 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
643
644 }
645
646 case html.EndTagToken:
647 tag, _ := decoder.TagName()
648 writeEndTag := true
649 switch string(tag) {
650 case "body":
651 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String(), rc.BaseURL.String())
652 case "style":
653 state = STATE_DEFAULT
654 case "noscript":
655 state = STATE_DEFAULT
656 writeEndTag = false
657 }
658 // skip noscript tags - only the tag, not the content, because javascript is sanitized
659 if writeEndTag {
660 fmt.Fprintf(out, "</%s>", tag)
661 }
662
663 case html.TextToken:
664 switch state {
665 case STATE_DEFAULT:
666 fmt.Fprintf(out, "%s", decoder.Raw())
667 case STATE_IN_STYLE:
668 sanitizeCSS(rc, out, decoder.Raw())
669 case STATE_IN_NOSCRIPT:
670 sanitizeHTML(rc, out, decoder.Raw())
671 }
672
673 case html.CommentToken:
674 // ignore comment. TODO : parse IE conditional comment
675
676 case html.DoctypeToken:
677 out.Write(decoder.Raw())
678 }
679 } else {
680 switch token {
681 case html.StartTagToken, html.SelfClosingTagToken:
682 tag, _ := decoder.TagName()
683 if inArray(tag, UNSAFE_ELEMENTS) {
684 unsafeElements = append(unsafeElements, tag)
685 }
686
687 case html.EndTagToken:
688 tag, _ := decoder.TagName()
689 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
690 unsafeElements = unsafeElements[:len(unsafeElements)-1]
691 }
692 }
693 }
694 }
695}
696
697func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
698 exclude := false
699 for _, attr := range attrs {
700 attrName := attr[0]
701 attrValue := attr[1]
702 if bytes.Equal(attrName, []byte("rel")) {
703 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
704 exclude = true
705 break
706 }
707 }
708 if bytes.Equal(attrName, []byte("as")) {
709 if bytes.Equal(attrValue, []byte("script")) {
710 exclude = true
711 break
712 }
713 }
714 }
715
716 if !exclude {
717 out.Write([]byte("<link"))
718 for _, attr := range attrs {
719 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
720 }
721 out.Write([]byte(">"))
722 }
723}
724
725func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
726 var http_equiv []byte
727 var content []byte
728
729 for _, attr := range attrs {
730 attrName := attr[0]
731 attrValue := attr[1]
732 if bytes.Equal(attrName, []byte("http-equiv")) {
733 http_equiv = bytes.ToLower(attrValue)
734 // exclude some <meta http-equiv="..." ..>
735 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
736 return
737 }
738 }
739 if bytes.Equal(attrName, []byte("content")) {
740 content = attrValue
741 }
742 if bytes.Equal(attrName, []byte("charset")) {
743 // exclude <meta charset="...">
744 return
745 }
746 }
747
748 out.Write([]byte("<meta"))
749 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
750 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
751 contentUrl := content[urlIndex+4:]
752 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
753 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
754 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
755 contentUrl = contentUrl[1 : len(contentUrl)-1]
756 }
757 }
758 // output proxify result
759 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
760 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
761 }
762 } else {
763 if len(http_equiv) > 0 {
764 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
765 }
766 sanitizeAttrs(rc, out, attrs)
767 }
768 out.Write([]byte(">"))
769}
770
771func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
772 for _, attr := range attrs {
773 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
774 }
775}
776
777func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
778 if inArray(attrName, SAFE_ATTRIBUTES) {
779 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
780 return
781 }
782 switch string(attrName) {
783 case "src", "href", "action":
784 if uri, err := rc.ProxifyURI(attrValue); err == nil {
785 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
786 } else if DEBUG {
787 log.Println("cannot proxify uri:", string(attrValue))
788 }
789 case "style":
790 cssAttr := bytes.NewBuffer(nil)
791 sanitizeCSS(rc, cssAttr, attrValue)
792 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
793 }
794}
795
796func mergeURIs(u1, u2 *url.URL) *url.URL {
797 if u2 == nil {
798 return u1
799 }
800 return u1.ResolveReference(u2)
801}
802
803// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
804// avoid memory allocation (except for the scheme)
805func sanitizeURI(uri []byte) ([]byte, string) {
806 first_rune_index := 0
807 first_rune_seen := false
808 scheme_last_index := -1
809 buffer := bytes.NewBuffer(make([]byte, 0, 10))
810
811 // remove trailing space and special characters
812 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
813
814 // loop over byte by byte
815 for i, c := range uri {
816 // ignore special characters and space (c <= 32)
817 if c > 32 {
818 // append to the lower case of the rune to buffer
819 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
820 c = c + 'a' - 'A'
821 }
822
823 buffer.WriteByte(c)
824
825 // update the first rune index that is not a special rune
826 if !first_rune_seen {
827 first_rune_index = i
828 first_rune_seen = true
829 }
830
831 if c == ':' {
832 // colon rune found, we have found the scheme
833 scheme_last_index = i
834 break
835 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
836 // special case : most probably a relative URI
837 break
838 }
839 }
840 }
841
842 if scheme_last_index != -1 {
843 // scheme found
844 // copy the "lower case without special runes scheme" before the ":" rune
845 scheme_start_index := scheme_last_index - buffer.Len() + 1
846 copy(uri[scheme_start_index:], buffer.Bytes())
847 // and return the result
848 return uri[scheme_start_index:], buffer.String()
849 } else {
850 // scheme NOT found
851 return uri[first_rune_index:], ""
852 }
853}
854
855func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
856 // sanitize URI
857 uri, scheme := sanitizeURI(uri)
858
859 // remove javascript protocol
860 if scheme == "javascript:" {
861 return "", nil
862 }
863
864 // TODO check malicious data: - e.g. data:script
865 if scheme == "data:" {
866 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
867 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
868 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
869 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
870 bytes.HasPrefix(uri, []byte("data:image/webp")) {
871 // should be safe
872 return string(uri), nil
873 } else {
874 // unsafe data
875 return "", nil
876 }
877 }
878
879 // parse the uri
880 u, err := url.Parse(string(uri))
881 if err != nil {
882 return "", err
883 }
884
885 // get the fragment (with the prefix "#")
886 fragment := ""
887 if len(u.Fragment) > 0 {
888 fragment = "#" + u.Fragment
889 }
890
891 // reset the fragment: it is not included in the mortyurl
892 u.Fragment = ""
893
894 // merge the URI with the document URI
895 u = mergeURIs(rc.BaseURL, u)
896
897 // simple internal link ?
898 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
899 if u.Scheme == rc.BaseURL.Scheme &&
900 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
901 u.Host == rc.BaseURL.Host &&
902 u.Path == rc.BaseURL.Path &&
903 u.RawQuery == rc.BaseURL.RawQuery {
904 // the fragment is the only difference between the document URI and the uri parameter
905 return fragment, nil
906 }
907
908 // return full URI and fragment (if not empty)
909 morty_uri := u.String()
910
911 if rc.Key == nil {
912 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
913 }
914 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
915}
916
917func inArray(b []byte, a [][]byte) bool {
918 for _, b2 := range a {
919 if bytes.Equal(b, b2) {
920 return true
921 }
922 }
923 return false
924}
925
926func hash(msg string, key []byte) string {
927 mac := hmac.New(sha256.New, key)
928 mac.Write([]byte(msg))
929 return hex.EncodeToString(mac.Sum(nil))
930}
931
932func verifyRequestURI(uri, hashMsg, key []byte) bool {
933 h := make([]byte, hex.DecodedLen(len(hashMsg)))
934 _, err := hex.Decode(h, hashMsg)
935 if err != nil {
936 if DEBUG {
937 log.Println("hmac error:", err)
938 }
939 return false
940 }
941 mac := hmac.New(sha256.New, key)
942 mac.Write(uri)
943 return hmac.Equal(h, mac.Sum(nil))
944}
945
946func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
947 ctx.SetContentType("text/html")
948 ctx.SetStatusCode(403)
949 ctx.Write([]byte(MORTY_HTML_PAGE_START))
950 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
951 ctx.Write([]byte("<p>Following</p><p><a href=\""))
952 ctx.Write([]byte(html.EscapeString(uri.String())))
953 ctx.Write([]byte("\" rel=\"noreferrer\">"))
954 ctx.Write([]byte(html.EscapeString(uri.String())))
955 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
956 ctx.Write([]byte(MORTY_HTML_PAGE_END))
957}
958
959func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
960 ctx.SetContentType("text/html; charset=UTF-8")
961 ctx.SetStatusCode(statusCode)
962 ctx.Write([]byte(MORTY_HTML_PAGE_START))
963 if err != nil {
964 if DEBUG {
965 log.Println("error:", err)
966 }
967 ctx.Write([]byte("<h2>Error: "))
968 ctx.Write([]byte(html.EscapeString(err.Error())))
969 ctx.Write([]byte("</h2>"))
970 }
971 if p.Key == nil {
972 ctx.Write([]byte(`
973 <form action="post">
974 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
975 <input type="submit" value="go" />
976 </form>`))
977 } else {
978 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
979 }
980 ctx.Write([]byte(MORTY_HTML_PAGE_END))
981}
982
983func main() {
984 default_listen_addr := os.Getenv("MORTY_ADDRESS")
985 if default_listen_addr == "" {
986 default_listen_addr = "127.0.0.1:3000"
987 }
988 default_key := os.Getenv("MORTY_KEY")
989 listen := flag.String("listen", default_listen_addr, "Listen address")
990 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
991 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
992 version := flag.Bool("version", false, "Show version")
993 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
994 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
995 flag.Parse()
996
997 if *version {
998 fmt.Println(VERSION)
999 return
1000 }
1001
1002 if *ipv6 {
1003 CLIENT.DialDualStack = true
1004 }
1005
1006 if *socks5 != "" {
1007 // this disables CLIENT.DialDualStack
1008 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1009 }
1010
1011 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
1012
1013 if *key != "" {
1014 var err error
1015 p.Key, err = base64.StdEncoding.DecodeString(*key)
1016 if err != nil {
1017 log.Fatal("Error parsing -key", err.Error())
1018 os.Exit(1)
1019 }
1020 }
1021
1022 log.Println("listening on", *listen)
1023
1024 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1025 log.Fatal("Error in ListenAndServe:", err)
1026 }
1027}
Note: See TracBrowser for help on using the repository browser.