source: code/trunk/morty.go@ 124

Last change on this file since 124 was 124, checked in by asciimoo, 5 years ago

[enh] inject header even if the page does not contain body element

File size: 29.4 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "github.com/asciimoo/morty/contenttype"
31)
32
33const (
34 STATE_DEFAULT int = 0
35 STATE_IN_STYLE int = 1
36 STATE_IN_NOSCRIPT int = 2
37)
38
39const VERSION = "v0.2.0"
40
41var DEBUG = os.Getenv("DEBUG") != "false"
42
43var CLIENT *fasthttp.Client = &fasthttp.Client{
44 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
45 ReadBufferSize: 16 * 1024, // 16K
46}
47
48var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
49
50var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
51 // html
52 contenttype.NewFilterEquals("text", "html", ""),
53 contenttype.NewFilterEquals("application", "xhtml", "xml"),
54 // css
55 contenttype.NewFilterEquals("text", "css", ""),
56 // images
57 contenttype.NewFilterEquals("image", "gif", ""),
58 contenttype.NewFilterEquals("image", "png", ""),
59 contenttype.NewFilterEquals("image", "jpeg", ""),
60 contenttype.NewFilterEquals("image", "pjpeg", ""),
61 contenttype.NewFilterEquals("image", "webp", ""),
62 contenttype.NewFilterEquals("image", "tiff", ""),
63 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
64 contenttype.NewFilterEquals("image", "bmp", ""),
65 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
66 contenttype.NewFilterEquals("image", "x-icon", ""),
67 // fonts
68 contenttype.NewFilterEquals("application", "font-otf", ""),
69 contenttype.NewFilterEquals("application", "font-ttf", ""),
70 contenttype.NewFilterEquals("application", "font-woff", ""),
71 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
72})
73
74var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
75 // texts
76 contenttype.NewFilterEquals("text", "csv", ""),
77 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
78 contenttype.NewFilterEquals("text", "plain", ""),
79 // API
80 contenttype.NewFilterEquals("application", "json", ""),
81 // Documents
82 contenttype.NewFilterEquals("application", "x-latex", ""),
83 contenttype.NewFilterEquals("application", "pdf", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
88 // Compressed archives
89 contenttype.NewFilterEquals("application", "zip", ""),
90 contenttype.NewFilterEquals("application", "gzip", ""),
91 contenttype.NewFilterEquals("application", "x-compressed", ""),
92 contenttype.NewFilterEquals("application", "x-gtar", ""),
93 contenttype.NewFilterEquals("application", "x-compress", ""),
94 // Generic binary
95 contenttype.NewFilterEquals("application", "octet-stream", ""),
96})
97
98var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
99 "charset": true,
100}
101
102var UNSAFE_ELEMENTS [][]byte = [][]byte{
103 []byte("applet"),
104 []byte("canvas"),
105 []byte("embed"),
106 //[]byte("iframe"),
107 []byte("math"),
108 []byte("script"),
109 []byte("svg"),
110}
111
112var SAFE_ATTRIBUTES [][]byte = [][]byte{
113 []byte("abbr"),
114 []byte("accesskey"),
115 []byte("align"),
116 []byte("alt"),
117 []byte("as"),
118 []byte("autocomplete"),
119 []byte("charset"),
120 []byte("checked"),
121 []byte("class"),
122 []byte("content"),
123 []byte("contenteditable"),
124 []byte("contextmenu"),
125 []byte("dir"),
126 []byte("for"),
127 []byte("height"),
128 []byte("hidden"),
129 []byte("hreflang"),
130 []byte("id"),
131 []byte("lang"),
132 []byte("media"),
133 []byte("method"),
134 []byte("name"),
135 []byte("nowrap"),
136 []byte("placeholder"),
137 []byte("property"),
138 []byte("rel"),
139 []byte("spellcheck"),
140 []byte("tabindex"),
141 []byte("target"),
142 []byte("title"),
143 []byte("translate"),
144 []byte("type"),
145 []byte("value"),
146 []byte("width"),
147}
148
149var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
150 []byte("alternate"),
151 []byte("archives"),
152 []byte("author"),
153 []byte("copyright"),
154 []byte("first"),
155 []byte("help"),
156 []byte("icon"),
157 []byte("index"),
158 []byte("last"),
159 []byte("license"),
160 []byte("manifest"),
161 []byte("next"),
162 []byte("pingback"),
163 []byte("prev"),
164 []byte("publisher"),
165 []byte("search"),
166 []byte("shortcut icon"),
167 []byte("stylesheet"),
168 []byte("up"),
169}
170
171var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
172 // X-UA-Compatible will be added automaticaly, so it can be skipped
173 []byte("date"),
174 []byte("last-modified"),
175 []byte("refresh"), // URL rewrite
176 // []byte("location"), TODO URL rewrite
177 []byte("content-language"),
178}
179
180type Proxy struct {
181 Key []byte
182 RequestTimeout time.Duration
183}
184
185type RequestConfig struct {
186 Key []byte
187 BaseURL *url.URL
188 BodyInjected bool
189}
190
191type HTMLBodyExtParam struct {
192 BaseURL string
193 HasMortyKey bool
194}
195
196type HTMLFormExtParam struct {
197 BaseURL string
198 MortyHash string
199}
200
201var HTML_FORM_EXTENSION *template.Template
202var HTML_BODY_EXTENSION *template.Template
203var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
204<meta http-equiv="X-UA-Compatible" content="IE=edge">
205<meta name="referrer" content="no-referrer">
206`
207
208var MORTY_HTML_PAGE_START string = `<!doctype html>
209<html>
210<head>
211<title>MortyProxy</title>
212<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
213<style>
214html { height: 100%; }
215body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
216input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
217input[placeholder] { width:80%; }
218a { text-decoration: none; #2980b9; }
219h1, h2 { font-weight: 200; margin-bottom: 2rem; }
220h1 { font-size: 3em; }
221.container { flex:1; min-height: 100%; margin-bottom: 1em; }
222.footer { margin: 1em; }
223.footer p { font-size: 0.8em; }
224</style>
225</head>
226<body>
227 <div class="container">
228 <h1>MortyProxy</h1>
229`
230
231var MORTY_HTML_PAGE_END string = `
232 </div>
233 <div class="footer">
234 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
235 <a href="https://github.com/asciimoo/morty">view on github</a>
236 </p>
237 </div>
238</body>
239</html>`
240
241var FAVICON_BYTES []byte
242
243func init() {
244 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
245
246 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
247 var err error
248 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
249 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
250 if err != nil {
251 panic(err)
252 }
253 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
254<input type="checkbox" id="mortytoggle" autocomplete="off" />
255<div id="mortyheader">
256 <form method="get">
257 <label for="mortytoggle">hide</label>
258 <span><a href="/">Morty Proxy</a></span>
259 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
260 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
261 </form>
262</div>
263<style>
264body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
265#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
266#mortyheader p { padding: 0 0 0.7em 0; display: block; }
267#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
268#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
269#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
270input[type=checkbox]#mortytoggle { display: none; }
271input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
272#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
273</style>
274`)
275 if err != nil {
276 panic(err)
277 }
278}
279
280func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
281
282 if appRequestHandler(ctx) {
283 return
284 }
285
286 requestHash := popRequestParam(ctx, []byte("mortyhash"))
287
288 requestURI := popRequestParam(ctx, []byte("mortyurl"))
289
290 if requestURI == nil {
291 p.serveMainPage(ctx, 200, nil)
292 return
293 }
294
295 if p.Key != nil {
296 if !verifyRequestURI(requestURI, requestHash, p.Key) {
297 // HTTP status code 403 : Forbidden
298 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
299 return
300 }
301 }
302
303 requestURIQuery := ctx.QueryArgs().QueryString()
304 if len(requestURIQuery) > 0 {
305 requestURI = append(requestURI, '?')
306 requestURI = append(requestURI, requestURIQuery...)
307 }
308
309 parsedURI, err := url.Parse(string(requestURI))
310
311 if err != nil {
312 // HTTP status code 500 : Internal Server Error
313 p.serveMainPage(ctx, 500, err)
314 return
315 }
316
317 if parsedURI.Scheme == "" {
318 parsedURI.Scheme = "https"
319 requestURI = append([]byte("https://"), requestURI...)
320 }
321
322 // Serve an intermediate page for protocols other than HTTP(S)
323 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
324 p.serveExitMortyPage(ctx, parsedURI)
325 return
326 }
327
328 req := fasthttp.AcquireRequest()
329 defer fasthttp.ReleaseRequest(req)
330 req.SetConnectionClose()
331
332 requestURIStr := string(requestURI)
333
334 if DEBUG {
335 log.Println("getting", requestURIStr)
336 }
337
338 req.SetRequestURI(requestURIStr)
339 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
340
341 resp := fasthttp.AcquireResponse()
342 defer fasthttp.ReleaseResponse(resp)
343
344 req.Header.SetMethodBytes(ctx.Method())
345 if ctx.IsPost() || ctx.IsPut() {
346 req.SetBody(ctx.PostBody())
347 }
348
349 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
350
351 if err != nil {
352 if err == fasthttp.ErrTimeout {
353 // HTTP status code 504 : Gateway Time-Out
354 p.serveMainPage(ctx, 504, err)
355 } else {
356 // HTTP status code 500 : Internal Server Error
357 p.serveMainPage(ctx, 500, err)
358 }
359 return
360 }
361
362 if resp.StatusCode() != 200 {
363 switch resp.StatusCode() {
364 case 301, 302, 303, 307, 308:
365 loc := resp.Header.Peek("Location")
366 if loc != nil {
367 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
368 url, err := rc.ProxifyURI(loc)
369 if err == nil {
370 ctx.SetStatusCode(resp.StatusCode())
371 ctx.Response.Header.Add("Location", url)
372 if DEBUG {
373 log.Println("redirect to", string(loc))
374 }
375 return
376 }
377 }
378 }
379 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
380 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
381 return
382 }
383
384 contentTypeBytes := resp.Header.Peek("Content-Type")
385
386 if contentTypeBytes == nil {
387 // HTTP status code 503 : Service Unavailable
388 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
389 return
390 }
391
392 contentTypeString := string(contentTypeBytes)
393
394 // decode Content-Type header
395 contentType, error := contenttype.ParseContentType(contentTypeString)
396 if error != nil {
397 // HTTP status code 503 : Service Unavailable
398 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
399 return
400 }
401
402 // content-disposition
403 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
404
405 // check content type
406 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
407 // it is not a usual content type
408 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
409 // force attachment for allowed content type
410 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
411 } else {
412 // deny access to forbidden content type
413 // HTTP status code 403 : Forbidden
414 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
415 return
416 }
417 }
418
419 // HACK : replace */xhtml by text/html
420 if contentType.SubType == "xhtml" {
421 contentType.TopLevelType = "text"
422 contentType.SubType = "html"
423 contentType.Suffix = ""
424 }
425
426 // conversion to UTF-8
427 var responseBody []byte
428
429 if contentType.TopLevelType == "text" {
430 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
431 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
432 responseBody, err = e.NewDecoder().Bytes(resp.Body())
433 if err != nil {
434 // HTTP status code 503 : Service Unavailable
435 p.serveMainPage(ctx, 503, err)
436 return
437 }
438 } else {
439 responseBody = resp.Body()
440 }
441 // update the charset or specify it
442 contentType.Parameters["charset"] = "UTF-8"
443 } else {
444 responseBody = resp.Body()
445 }
446
447 //
448 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
449
450 // set the content type
451 ctx.SetContentType(contentType.String())
452
453 // output according to MIME type
454 switch {
455 case contentType.SubType == "css" && contentType.Suffix == "":
456 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
457 case contentType.SubType == "html" && contentType.Suffix == "":
458 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
459 sanitizeHTML(rc, ctx, responseBody)
460 if !rc.BodyInjected {
461 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
462 if len(rc.Key) > 0 {
463 p.HasMortyKey = true
464 }
465 err := HTML_BODY_EXTENSION.Execute(ctx, p)
466 if err != nil {
467 if DEBUG {
468 fmt.Println("failed to inject body extension", err)
469 }
470 }
471 }
472 default:
473 if contentDispositionBytes != nil {
474 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
475 }
476 ctx.Write(responseBody)
477 }
478}
479
480// force content-disposition to attachment
481func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
482 var contentDispositionParams map[string]string
483
484 if contentDispositionBytes != nil {
485 var err error
486 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
487 if err != nil {
488 contentDispositionParams = make(map[string]string)
489 }
490 } else {
491 contentDispositionParams = make(map[string]string)
492 }
493
494 _, fileNameDefined := contentDispositionParams["filename"]
495 if !fileNameDefined {
496 // TODO : sanitize filename
497 contentDispositionParams["fileName"] = filepath.Base(url.Path)
498 }
499
500 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
501}
502
503func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
504 // serve robots.txt
505 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
506 ctx.SetContentType("text/plain")
507 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
508 return true
509 }
510
511 // server favicon.ico
512 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
513 ctx.SetContentType("image/png")
514 ctx.Write(FAVICON_BYTES)
515 return true
516 }
517
518 return false
519}
520
521func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
522 param := ctx.QueryArgs().PeekBytes(paramName)
523
524 if param == nil {
525 param = ctx.PostArgs().PeekBytes(paramName)
526 ctx.PostArgs().DelBytes(paramName)
527 }
528 ctx.QueryArgs().DelBytes(paramName)
529
530 return param
531}
532
533func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
534 // TODO
535
536 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
537
538 if urlSlices == nil {
539 out.Write(css)
540 return
541 }
542
543 startIndex := 0
544
545 for _, s := range urlSlices {
546 urlStart := s[4]
547 urlEnd := s[5]
548
549 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
550 out.Write(css[startIndex:urlStart])
551 out.Write([]byte(uri))
552 startIndex = urlEnd
553 } else if DEBUG {
554 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
555 }
556 }
557 if startIndex < len(css) {
558 out.Write(css[startIndex:len(css)])
559 }
560}
561
562func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
563 r := bytes.NewReader(htmlDoc)
564 decoder := html.NewTokenizer(r)
565 decoder.AllowCDATA(true)
566
567 unsafeElements := make([][]byte, 0, 8)
568 state := STATE_DEFAULT
569 for {
570 token := decoder.Next()
571 if token == html.ErrorToken {
572 err := decoder.Err()
573 if err != io.EOF {
574 log.Println("failed to parse HTML")
575 }
576 break
577 }
578
579 if len(unsafeElements) == 0 {
580
581 switch token {
582 case html.StartTagToken, html.SelfClosingTagToken:
583 tag, hasAttrs := decoder.TagName()
584 safe := !inArray(tag, UNSAFE_ELEMENTS)
585 if !safe {
586 if token != html.SelfClosingTagToken {
587 var unsafeTag []byte = make([]byte, len(tag))
588 copy(unsafeTag, tag)
589 unsafeElements = append(unsafeElements, unsafeTag)
590 }
591 break
592 }
593 if bytes.Equal(tag, []byte("base")) {
594 for {
595 attrName, attrValue, moreAttr := decoder.TagAttr()
596 if bytes.Equal(attrName, []byte("href")) {
597 parsedURI, err := url.Parse(string(attrValue))
598 if err == nil {
599 rc.BaseURL = parsedURI
600 }
601 }
602 if !moreAttr {
603 break
604 }
605 }
606 break
607 }
608 if bytes.Equal(tag, []byte("noscript")) {
609 state = STATE_IN_NOSCRIPT
610 break
611 }
612 var attrs [][][]byte
613 if hasAttrs {
614 for {
615 attrName, attrValue, moreAttr := decoder.TagAttr()
616 attrs = append(attrs, [][]byte{
617 attrName,
618 attrValue,
619 []byte(html.EscapeString(string(attrValue))),
620 })
621 if !moreAttr {
622 break
623 }
624 }
625 }
626 if bytes.Equal(tag, []byte("link")) {
627 sanitizeLinkTag(rc, out, attrs)
628 break
629 }
630
631 if bytes.Equal(tag, []byte("meta")) {
632 sanitizeMetaTag(rc, out, attrs)
633 break
634 }
635
636 fmt.Fprintf(out, "<%s", tag)
637
638 if hasAttrs {
639 sanitizeAttrs(rc, out, attrs)
640 }
641
642 if token == html.SelfClosingTagToken {
643 fmt.Fprintf(out, " />")
644 } else {
645 fmt.Fprintf(out, ">")
646 if bytes.Equal(tag, []byte("style")) {
647 state = STATE_IN_STYLE
648 }
649 }
650
651 if bytes.Equal(tag, []byte("head")) {
652 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
653 }
654
655 if bytes.Equal(tag, []byte("form")) {
656 var formURL *url.URL
657 for _, attr := range attrs {
658 if bytes.Equal(attr[0], []byte("action")) {
659 formURL, _ = url.Parse(string(attr[1]))
660 formURL = mergeURIs(rc.BaseURL, formURL)
661 break
662 }
663 }
664 if formURL == nil {
665 formURL = rc.BaseURL
666 }
667 urlStr := formURL.String()
668 var key string
669 if rc.Key != nil {
670 key = hash(urlStr, rc.Key)
671 }
672 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
673 if err != nil {
674 if DEBUG {
675 fmt.Println("failed to inject body extension", err)
676 }
677 }
678 }
679
680 case html.EndTagToken:
681 tag, _ := decoder.TagName()
682 writeEndTag := true
683 switch string(tag) {
684 case "body":
685 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
686 if len(rc.Key) > 0 {
687 p.HasMortyKey = true
688 }
689 err := HTML_BODY_EXTENSION.Execute(out, p)
690 if err != nil {
691 if DEBUG {
692 fmt.Println("failed to inject body extension", err)
693 }
694 }
695 rc.BodyInjected = true
696 case "style":
697 state = STATE_DEFAULT
698 case "noscript":
699 state = STATE_DEFAULT
700 writeEndTag = false
701 }
702 // skip noscript tags - only the tag, not the content, because javascript is sanitized
703 if writeEndTag {
704 fmt.Fprintf(out, "</%s>", tag)
705 }
706
707 case html.TextToken:
708 switch state {
709 case STATE_DEFAULT:
710 fmt.Fprintf(out, "%s", decoder.Raw())
711 case STATE_IN_STYLE:
712 sanitizeCSS(rc, out, decoder.Raw())
713 case STATE_IN_NOSCRIPT:
714 sanitizeHTML(rc, out, decoder.Raw())
715 }
716
717 case html.CommentToken:
718 // ignore comment. TODO : parse IE conditional comment
719
720 case html.DoctypeToken:
721 out.Write(decoder.Raw())
722 }
723 } else {
724 switch token {
725 case html.StartTagToken, html.SelfClosingTagToken:
726 tag, _ := decoder.TagName()
727 if inArray(tag, UNSAFE_ELEMENTS) {
728 unsafeElements = append(unsafeElements, tag)
729 }
730
731 case html.EndTagToken:
732 tag, _ := decoder.TagName()
733 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
734 unsafeElements = unsafeElements[:len(unsafeElements)-1]
735 }
736 }
737 }
738 }
739}
740
741func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
742 exclude := false
743 for _, attr := range attrs {
744 attrName := attr[0]
745 attrValue := attr[1]
746 if bytes.Equal(attrName, []byte("rel")) {
747 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
748 exclude = true
749 break
750 }
751 }
752 if bytes.Equal(attrName, []byte("as")) {
753 if bytes.Equal(attrValue, []byte("script")) {
754 exclude = true
755 break
756 }
757 }
758 }
759
760 if !exclude {
761 out.Write([]byte("<link"))
762 for _, attr := range attrs {
763 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
764 }
765 out.Write([]byte(">"))
766 }
767}
768
769func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
770 var http_equiv []byte
771 var content []byte
772
773 for _, attr := range attrs {
774 attrName := attr[0]
775 attrValue := attr[1]
776 if bytes.Equal(attrName, []byte("http-equiv")) {
777 http_equiv = bytes.ToLower(attrValue)
778 // exclude some <meta http-equiv="..." ..>
779 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
780 return
781 }
782 }
783 if bytes.Equal(attrName, []byte("content")) {
784 content = attrValue
785 }
786 if bytes.Equal(attrName, []byte("charset")) {
787 // exclude <meta charset="...">
788 return
789 }
790 }
791
792 out.Write([]byte("<meta"))
793 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
794 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
795 contentUrl := content[urlIndex+4:]
796 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
797 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
798 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
799 contentUrl = contentUrl[1 : len(contentUrl)-1]
800 }
801 }
802 // output proxify result
803 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
804 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
805 }
806 } else {
807 if len(http_equiv) > 0 {
808 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
809 }
810 sanitizeAttrs(rc, out, attrs)
811 }
812 out.Write([]byte(">"))
813}
814
815func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
816 for _, attr := range attrs {
817 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
818 }
819}
820
821func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
822 if inArray(attrName, SAFE_ATTRIBUTES) {
823 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
824 return
825 }
826 switch string(attrName) {
827 case "src", "href", "action":
828 if uri, err := rc.ProxifyURI(attrValue); err == nil {
829 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
830 } else if DEBUG {
831 log.Println("cannot proxify uri:", string(attrValue))
832 }
833 case "style":
834 cssAttr := bytes.NewBuffer(nil)
835 sanitizeCSS(rc, cssAttr, attrValue)
836 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
837 }
838}
839
840func mergeURIs(u1, u2 *url.URL) *url.URL {
841 if u2 == nil {
842 return u1
843 }
844 return u1.ResolveReference(u2)
845}
846
847// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
848// avoid memory allocation (except for the scheme)
849func sanitizeURI(uri []byte) ([]byte, string) {
850 first_rune_index := 0
851 first_rune_seen := false
852 scheme_last_index := -1
853 buffer := bytes.NewBuffer(make([]byte, 0, 10))
854
855 // remove trailing space and special characters
856 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
857
858 // loop over byte by byte
859 for i, c := range uri {
860 // ignore special characters and space (c <= 32)
861 if c > 32 {
862 // append to the lower case of the rune to buffer
863 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
864 c = c + 'a' - 'A'
865 }
866
867 buffer.WriteByte(c)
868
869 // update the first rune index that is not a special rune
870 if !first_rune_seen {
871 first_rune_index = i
872 first_rune_seen = true
873 }
874
875 if c == ':' {
876 // colon rune found, we have found the scheme
877 scheme_last_index = i
878 break
879 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
880 // special case : most probably a relative URI
881 break
882 }
883 }
884 }
885
886 if scheme_last_index != -1 {
887 // scheme found
888 // copy the "lower case without special runes scheme" before the ":" rune
889 scheme_start_index := scheme_last_index - buffer.Len() + 1
890 copy(uri[scheme_start_index:], buffer.Bytes())
891 // and return the result
892 return uri[scheme_start_index:], buffer.String()
893 } else {
894 // scheme NOT found
895 return uri[first_rune_index:], ""
896 }
897}
898
899func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
900 // sanitize URI
901 uri, scheme := sanitizeURI(uri)
902
903 // remove javascript protocol
904 if scheme == "javascript:" {
905 return "", nil
906 }
907
908 // TODO check malicious data: - e.g. data:script
909 if scheme == "data:" {
910 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
911 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
912 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
913 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
914 bytes.HasPrefix(uri, []byte("data:image/webp")) {
915 // should be safe
916 return string(uri), nil
917 } else {
918 // unsafe data
919 return "", nil
920 }
921 }
922
923 // parse the uri
924 u, err := url.Parse(string(uri))
925 if err != nil {
926 return "", err
927 }
928
929 // get the fragment (with the prefix "#")
930 fragment := ""
931 if len(u.Fragment) > 0 {
932 fragment = "#" + u.Fragment
933 }
934
935 // reset the fragment: it is not included in the mortyurl
936 u.Fragment = ""
937
938 // merge the URI with the document URI
939 u = mergeURIs(rc.BaseURL, u)
940
941 // simple internal link ?
942 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
943 if u.Scheme == rc.BaseURL.Scheme &&
944 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
945 u.Host == rc.BaseURL.Host &&
946 u.Path == rc.BaseURL.Path &&
947 u.RawQuery == rc.BaseURL.RawQuery {
948 // the fragment is the only difference between the document URI and the uri parameter
949 return fragment, nil
950 }
951
952 // return full URI and fragment (if not empty)
953 morty_uri := u.String()
954
955 if rc.Key == nil {
956 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
957 }
958 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
959}
960
961func inArray(b []byte, a [][]byte) bool {
962 for _, b2 := range a {
963 if bytes.Equal(b, b2) {
964 return true
965 }
966 }
967 return false
968}
969
970func hash(msg string, key []byte) string {
971 mac := hmac.New(sha256.New, key)
972 mac.Write([]byte(msg))
973 return hex.EncodeToString(mac.Sum(nil))
974}
975
976func verifyRequestURI(uri, hashMsg, key []byte) bool {
977 h := make([]byte, hex.DecodedLen(len(hashMsg)))
978 _, err := hex.Decode(h, hashMsg)
979 if err != nil {
980 if DEBUG {
981 log.Println("hmac error:", err)
982 }
983 return false
984 }
985 mac := hmac.New(sha256.New, key)
986 mac.Write(uri)
987 return hmac.Equal(h, mac.Sum(nil))
988}
989
990func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
991 ctx.SetContentType("text/html")
992 ctx.SetStatusCode(403)
993 ctx.Write([]byte(MORTY_HTML_PAGE_START))
994 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
995 ctx.Write([]byte("<p>Following</p><p><a href=\""))
996 ctx.Write([]byte(html.EscapeString(uri.String())))
997 ctx.Write([]byte("\" rel=\"noreferrer\">"))
998 ctx.Write([]byte(html.EscapeString(uri.String())))
999 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1000 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1001}
1002
1003func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
1004 ctx.SetContentType("text/html; charset=UTF-8")
1005 ctx.SetStatusCode(statusCode)
1006 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1007 if err != nil {
1008 if DEBUG {
1009 log.Println("error:", err)
1010 }
1011 ctx.Write([]byte("<h2>Error: "))
1012 ctx.Write([]byte(html.EscapeString(err.Error())))
1013 ctx.Write([]byte("</h2>"))
1014 }
1015 if p.Key == nil {
1016 ctx.Write([]byte(`
1017 <form action="post">
1018 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1019 <input type="submit" value="go" />
1020 </form>`))
1021 } else {
1022 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1023 }
1024 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1025}
1026
1027func main() {
1028 default_listen_addr := os.Getenv("MORTY_ADDRESS")
1029 if default_listen_addr == "" {
1030 default_listen_addr = "127.0.0.1:3000"
1031 }
1032 default_key := os.Getenv("MORTY_KEY")
1033 listen := flag.String("listen", default_listen_addr, "Listen address")
1034 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1035 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
1036 version := flag.Bool("version", false, "Show version")
1037 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
1038 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
1039 flag.Parse()
1040
1041 if *version {
1042 fmt.Println(VERSION)
1043 return
1044 }
1045
1046 if *ipv6 {
1047 CLIENT.DialDualStack = true
1048 }
1049
1050 if *socks5 != "" {
1051 // this disables CLIENT.DialDualStack
1052 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1053 }
1054
1055 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
1056
1057 if *key != "" {
1058 var err error
1059 p.Key, err = base64.StdEncoding.DecodeString(*key)
1060 if err != nil {
1061 log.Fatal("Error parsing -key", err.Error())
1062 os.Exit(1)
1063 }
1064 }
1065
1066 log.Println("listening on", *listen)
1067
1068 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1069 log.Fatal("Error in ListenAndServe:", err)
1070 }
1071}
Note: See TracBrowser for help on using the repository browser.