source: code/trunk/morty.go@ 125

Last change on this file since 125 was 125, checked in by asciimoo, 5 years ago

[fix] concatenate url params to existing ones

File size: 29.5 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "github.com/asciimoo/morty/contenttype"
31)
32
33const (
34 STATE_DEFAULT int = 0
35 STATE_IN_STYLE int = 1
36 STATE_IN_NOSCRIPT int = 2
37)
38
39const VERSION = "v0.2.0"
40
41var DEBUG = os.Getenv("DEBUG") != "false"
42
43var CLIENT *fasthttp.Client = &fasthttp.Client{
44 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
45 ReadBufferSize: 16 * 1024, // 16K
46}
47
48var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
49
50var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
51 // html
52 contenttype.NewFilterEquals("text", "html", ""),
53 contenttype.NewFilterEquals("application", "xhtml", "xml"),
54 // css
55 contenttype.NewFilterEquals("text", "css", ""),
56 // images
57 contenttype.NewFilterEquals("image", "gif", ""),
58 contenttype.NewFilterEquals("image", "png", ""),
59 contenttype.NewFilterEquals("image", "jpeg", ""),
60 contenttype.NewFilterEquals("image", "pjpeg", ""),
61 contenttype.NewFilterEquals("image", "webp", ""),
62 contenttype.NewFilterEquals("image", "tiff", ""),
63 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
64 contenttype.NewFilterEquals("image", "bmp", ""),
65 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
66 contenttype.NewFilterEquals("image", "x-icon", ""),
67 // fonts
68 contenttype.NewFilterEquals("application", "font-otf", ""),
69 contenttype.NewFilterEquals("application", "font-ttf", ""),
70 contenttype.NewFilterEquals("application", "font-woff", ""),
71 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
72})
73
74var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
75 // texts
76 contenttype.NewFilterEquals("text", "csv", ""),
77 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
78 contenttype.NewFilterEquals("text", "plain", ""),
79 // API
80 contenttype.NewFilterEquals("application", "json", ""),
81 // Documents
82 contenttype.NewFilterEquals("application", "x-latex", ""),
83 contenttype.NewFilterEquals("application", "pdf", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
88 // Compressed archives
89 contenttype.NewFilterEquals("application", "zip", ""),
90 contenttype.NewFilterEquals("application", "gzip", ""),
91 contenttype.NewFilterEquals("application", "x-compressed", ""),
92 contenttype.NewFilterEquals("application", "x-gtar", ""),
93 contenttype.NewFilterEquals("application", "x-compress", ""),
94 // Generic binary
95 contenttype.NewFilterEquals("application", "octet-stream", ""),
96})
97
98var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
99 "charset": true,
100}
101
102var UNSAFE_ELEMENTS [][]byte = [][]byte{
103 []byte("applet"),
104 []byte("canvas"),
105 []byte("embed"),
106 //[]byte("iframe"),
107 []byte("math"),
108 []byte("script"),
109 []byte("svg"),
110}
111
112var SAFE_ATTRIBUTES [][]byte = [][]byte{
113 []byte("abbr"),
114 []byte("accesskey"),
115 []byte("align"),
116 []byte("alt"),
117 []byte("as"),
118 []byte("autocomplete"),
119 []byte("charset"),
120 []byte("checked"),
121 []byte("class"),
122 []byte("content"),
123 []byte("contenteditable"),
124 []byte("contextmenu"),
125 []byte("dir"),
126 []byte("for"),
127 []byte("height"),
128 []byte("hidden"),
129 []byte("hreflang"),
130 []byte("id"),
131 []byte("lang"),
132 []byte("media"),
133 []byte("method"),
134 []byte("name"),
135 []byte("nowrap"),
136 []byte("placeholder"),
137 []byte("property"),
138 []byte("rel"),
139 []byte("spellcheck"),
140 []byte("tabindex"),
141 []byte("target"),
142 []byte("title"),
143 []byte("translate"),
144 []byte("type"),
145 []byte("value"),
146 []byte("width"),
147}
148
149var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
150 []byte("alternate"),
151 []byte("archives"),
152 []byte("author"),
153 []byte("copyright"),
154 []byte("first"),
155 []byte("help"),
156 []byte("icon"),
157 []byte("index"),
158 []byte("last"),
159 []byte("license"),
160 []byte("manifest"),
161 []byte("next"),
162 []byte("pingback"),
163 []byte("prev"),
164 []byte("publisher"),
165 []byte("search"),
166 []byte("shortcut icon"),
167 []byte("stylesheet"),
168 []byte("up"),
169}
170
171var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
172 // X-UA-Compatible will be added automaticaly, so it can be skipped
173 []byte("date"),
174 []byte("last-modified"),
175 []byte("refresh"), // URL rewrite
176 // []byte("location"), TODO URL rewrite
177 []byte("content-language"),
178}
179
180type Proxy struct {
181 Key []byte
182 RequestTimeout time.Duration
183}
184
185type RequestConfig struct {
186 Key []byte
187 BaseURL *url.URL
188 BodyInjected bool
189}
190
191type HTMLBodyExtParam struct {
192 BaseURL string
193 HasMortyKey bool
194}
195
196type HTMLFormExtParam struct {
197 BaseURL string
198 MortyHash string
199}
200
201var HTML_FORM_EXTENSION *template.Template
202var HTML_BODY_EXTENSION *template.Template
203var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
204<meta http-equiv="X-UA-Compatible" content="IE=edge">
205<meta name="referrer" content="no-referrer">
206`
207
208var MORTY_HTML_PAGE_START string = `<!doctype html>
209<html>
210<head>
211<title>MortyProxy</title>
212<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
213<style>
214html { height: 100%; }
215body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
216input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
217input[placeholder] { width:80%; }
218a { text-decoration: none; #2980b9; }
219h1, h2 { font-weight: 200; margin-bottom: 2rem; }
220h1 { font-size: 3em; }
221.container { flex:1; min-height: 100%; margin-bottom: 1em; }
222.footer { margin: 1em; }
223.footer p { font-size: 0.8em; }
224</style>
225</head>
226<body>
227 <div class="container">
228 <h1>MortyProxy</h1>
229`
230
231var MORTY_HTML_PAGE_END string = `
232 </div>
233 <div class="footer">
234 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
235 <a href="https://github.com/asciimoo/morty">view on github</a>
236 </p>
237 </div>
238</body>
239</html>`
240
241var FAVICON_BYTES []byte
242
243func init() {
244 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
245
246 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
247 var err error
248 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
249 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
250 if err != nil {
251 panic(err)
252 }
253 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
254<input type="checkbox" id="mortytoggle" autocomplete="off" />
255<div id="mortyheader">
256 <form method="get">
257 <label for="mortytoggle">hide</label>
258 <span><a href="/">Morty Proxy</a></span>
259 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
260 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
261 </form>
262</div>
263<style>
264body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
265#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
266#mortyheader p { padding: 0 0 0.7em 0; display: block; }
267#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
268#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
269#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
270input[type=checkbox]#mortytoggle { display: none; }
271input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
272#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
273</style>
274`)
275 if err != nil {
276 panic(err)
277 }
278}
279
280func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
281
282 if appRequestHandler(ctx) {
283 return
284 }
285
286 requestHash := popRequestParam(ctx, []byte("mortyhash"))
287
288 requestURI := popRequestParam(ctx, []byte("mortyurl"))
289
290 if requestURI == nil {
291 p.serveMainPage(ctx, 200, nil)
292 return
293 }
294
295 if p.Key != nil {
296 if !verifyRequestURI(requestURI, requestHash, p.Key) {
297 // HTTP status code 403 : Forbidden
298 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
299 return
300 }
301 }
302
303 requestURIQuery := ctx.QueryArgs().QueryString()
304 if len(requestURIQuery) > 0 {
305 if bytes.ContainsRune(requestURI, '?') {
306 requestURI = append(requestURI, '&')
307 } else {
308 requestURI = append(requestURI, '?')
309 }
310 requestURI = append(requestURI, requestURIQuery...)
311 }
312
313 parsedURI, err := url.Parse(string(requestURI))
314
315 if err != nil {
316 // HTTP status code 500 : Internal Server Error
317 p.serveMainPage(ctx, 500, err)
318 return
319 }
320
321 if parsedURI.Scheme == "" {
322 parsedURI.Scheme = "https"
323 requestURI = append([]byte("https://"), requestURI...)
324 }
325
326 // Serve an intermediate page for protocols other than HTTP(S)
327 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
328 p.serveExitMortyPage(ctx, parsedURI)
329 return
330 }
331
332 req := fasthttp.AcquireRequest()
333 defer fasthttp.ReleaseRequest(req)
334 req.SetConnectionClose()
335
336 requestURIStr := string(requestURI)
337
338 if DEBUG {
339 log.Println("getting", requestURIStr)
340 }
341
342 req.SetRequestURI(requestURIStr)
343 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
344
345 resp := fasthttp.AcquireResponse()
346 defer fasthttp.ReleaseResponse(resp)
347
348 req.Header.SetMethodBytes(ctx.Method())
349 if ctx.IsPost() || ctx.IsPut() {
350 req.SetBody(ctx.PostBody())
351 }
352
353 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
354
355 if err != nil {
356 if err == fasthttp.ErrTimeout {
357 // HTTP status code 504 : Gateway Time-Out
358 p.serveMainPage(ctx, 504, err)
359 } else {
360 // HTTP status code 500 : Internal Server Error
361 p.serveMainPage(ctx, 500, err)
362 }
363 return
364 }
365
366 if resp.StatusCode() != 200 {
367 switch resp.StatusCode() {
368 case 301, 302, 303, 307, 308:
369 loc := resp.Header.Peek("Location")
370 if loc != nil {
371 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
372 url, err := rc.ProxifyURI(loc)
373 if err == nil {
374 ctx.SetStatusCode(resp.StatusCode())
375 ctx.Response.Header.Add("Location", url)
376 if DEBUG {
377 log.Println("redirect to", string(loc))
378 }
379 return
380 }
381 }
382 }
383 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
384 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
385 return
386 }
387
388 contentTypeBytes := resp.Header.Peek("Content-Type")
389
390 if contentTypeBytes == nil {
391 // HTTP status code 503 : Service Unavailable
392 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
393 return
394 }
395
396 contentTypeString := string(contentTypeBytes)
397
398 // decode Content-Type header
399 contentType, error := contenttype.ParseContentType(contentTypeString)
400 if error != nil {
401 // HTTP status code 503 : Service Unavailable
402 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
403 return
404 }
405
406 // content-disposition
407 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
408
409 // check content type
410 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
411 // it is not a usual content type
412 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
413 // force attachment for allowed content type
414 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
415 } else {
416 // deny access to forbidden content type
417 // HTTP status code 403 : Forbidden
418 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
419 return
420 }
421 }
422
423 // HACK : replace */xhtml by text/html
424 if contentType.SubType == "xhtml" {
425 contentType.TopLevelType = "text"
426 contentType.SubType = "html"
427 contentType.Suffix = ""
428 }
429
430 // conversion to UTF-8
431 var responseBody []byte
432
433 if contentType.TopLevelType == "text" {
434 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
435 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
436 responseBody, err = e.NewDecoder().Bytes(resp.Body())
437 if err != nil {
438 // HTTP status code 503 : Service Unavailable
439 p.serveMainPage(ctx, 503, err)
440 return
441 }
442 } else {
443 responseBody = resp.Body()
444 }
445 // update the charset or specify it
446 contentType.Parameters["charset"] = "UTF-8"
447 } else {
448 responseBody = resp.Body()
449 }
450
451 //
452 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
453
454 // set the content type
455 ctx.SetContentType(contentType.String())
456
457 // output according to MIME type
458 switch {
459 case contentType.SubType == "css" && contentType.Suffix == "":
460 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
461 case contentType.SubType == "html" && contentType.Suffix == "":
462 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
463 sanitizeHTML(rc, ctx, responseBody)
464 if !rc.BodyInjected {
465 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
466 if len(rc.Key) > 0 {
467 p.HasMortyKey = true
468 }
469 err := HTML_BODY_EXTENSION.Execute(ctx, p)
470 if err != nil {
471 if DEBUG {
472 fmt.Println("failed to inject body extension", err)
473 }
474 }
475 }
476 default:
477 if contentDispositionBytes != nil {
478 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
479 }
480 ctx.Write(responseBody)
481 }
482}
483
484// force content-disposition to attachment
485func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
486 var contentDispositionParams map[string]string
487
488 if contentDispositionBytes != nil {
489 var err error
490 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
491 if err != nil {
492 contentDispositionParams = make(map[string]string)
493 }
494 } else {
495 contentDispositionParams = make(map[string]string)
496 }
497
498 _, fileNameDefined := contentDispositionParams["filename"]
499 if !fileNameDefined {
500 // TODO : sanitize filename
501 contentDispositionParams["fileName"] = filepath.Base(url.Path)
502 }
503
504 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
505}
506
507func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
508 // serve robots.txt
509 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
510 ctx.SetContentType("text/plain")
511 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
512 return true
513 }
514
515 // server favicon.ico
516 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
517 ctx.SetContentType("image/png")
518 ctx.Write(FAVICON_BYTES)
519 return true
520 }
521
522 return false
523}
524
525func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
526 param := ctx.QueryArgs().PeekBytes(paramName)
527
528 if param == nil {
529 param = ctx.PostArgs().PeekBytes(paramName)
530 ctx.PostArgs().DelBytes(paramName)
531 }
532 ctx.QueryArgs().DelBytes(paramName)
533
534 return param
535}
536
537func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
538 // TODO
539
540 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
541
542 if urlSlices == nil {
543 out.Write(css)
544 return
545 }
546
547 startIndex := 0
548
549 for _, s := range urlSlices {
550 urlStart := s[4]
551 urlEnd := s[5]
552
553 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
554 out.Write(css[startIndex:urlStart])
555 out.Write([]byte(uri))
556 startIndex = urlEnd
557 } else if DEBUG {
558 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
559 }
560 }
561 if startIndex < len(css) {
562 out.Write(css[startIndex:len(css)])
563 }
564}
565
566func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
567 r := bytes.NewReader(htmlDoc)
568 decoder := html.NewTokenizer(r)
569 decoder.AllowCDATA(true)
570
571 unsafeElements := make([][]byte, 0, 8)
572 state := STATE_DEFAULT
573 for {
574 token := decoder.Next()
575 if token == html.ErrorToken {
576 err := decoder.Err()
577 if err != io.EOF {
578 log.Println("failed to parse HTML")
579 }
580 break
581 }
582
583 if len(unsafeElements) == 0 {
584
585 switch token {
586 case html.StartTagToken, html.SelfClosingTagToken:
587 tag, hasAttrs := decoder.TagName()
588 safe := !inArray(tag, UNSAFE_ELEMENTS)
589 if !safe {
590 if token != html.SelfClosingTagToken {
591 var unsafeTag []byte = make([]byte, len(tag))
592 copy(unsafeTag, tag)
593 unsafeElements = append(unsafeElements, unsafeTag)
594 }
595 break
596 }
597 if bytes.Equal(tag, []byte("base")) {
598 for {
599 attrName, attrValue, moreAttr := decoder.TagAttr()
600 if bytes.Equal(attrName, []byte("href")) {
601 parsedURI, err := url.Parse(string(attrValue))
602 if err == nil {
603 rc.BaseURL = parsedURI
604 }
605 }
606 if !moreAttr {
607 break
608 }
609 }
610 break
611 }
612 if bytes.Equal(tag, []byte("noscript")) {
613 state = STATE_IN_NOSCRIPT
614 break
615 }
616 var attrs [][][]byte
617 if hasAttrs {
618 for {
619 attrName, attrValue, moreAttr := decoder.TagAttr()
620 attrs = append(attrs, [][]byte{
621 attrName,
622 attrValue,
623 []byte(html.EscapeString(string(attrValue))),
624 })
625 if !moreAttr {
626 break
627 }
628 }
629 }
630 if bytes.Equal(tag, []byte("link")) {
631 sanitizeLinkTag(rc, out, attrs)
632 break
633 }
634
635 if bytes.Equal(tag, []byte("meta")) {
636 sanitizeMetaTag(rc, out, attrs)
637 break
638 }
639
640 fmt.Fprintf(out, "<%s", tag)
641
642 if hasAttrs {
643 sanitizeAttrs(rc, out, attrs)
644 }
645
646 if token == html.SelfClosingTagToken {
647 fmt.Fprintf(out, " />")
648 } else {
649 fmt.Fprintf(out, ">")
650 if bytes.Equal(tag, []byte("style")) {
651 state = STATE_IN_STYLE
652 }
653 }
654
655 if bytes.Equal(tag, []byte("head")) {
656 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
657 }
658
659 if bytes.Equal(tag, []byte("form")) {
660 var formURL *url.URL
661 for _, attr := range attrs {
662 if bytes.Equal(attr[0], []byte("action")) {
663 formURL, _ = url.Parse(string(attr[1]))
664 formURL = mergeURIs(rc.BaseURL, formURL)
665 break
666 }
667 }
668 if formURL == nil {
669 formURL = rc.BaseURL
670 }
671 urlStr := formURL.String()
672 var key string
673 if rc.Key != nil {
674 key = hash(urlStr, rc.Key)
675 }
676 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
677 if err != nil {
678 if DEBUG {
679 fmt.Println("failed to inject body extension", err)
680 }
681 }
682 }
683
684 case html.EndTagToken:
685 tag, _ := decoder.TagName()
686 writeEndTag := true
687 switch string(tag) {
688 case "body":
689 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
690 if len(rc.Key) > 0 {
691 p.HasMortyKey = true
692 }
693 err := HTML_BODY_EXTENSION.Execute(out, p)
694 if err != nil {
695 if DEBUG {
696 fmt.Println("failed to inject body extension", err)
697 }
698 }
699 rc.BodyInjected = true
700 case "style":
701 state = STATE_DEFAULT
702 case "noscript":
703 state = STATE_DEFAULT
704 writeEndTag = false
705 }
706 // skip noscript tags - only the tag, not the content, because javascript is sanitized
707 if writeEndTag {
708 fmt.Fprintf(out, "</%s>", tag)
709 }
710
711 case html.TextToken:
712 switch state {
713 case STATE_DEFAULT:
714 fmt.Fprintf(out, "%s", decoder.Raw())
715 case STATE_IN_STYLE:
716 sanitizeCSS(rc, out, decoder.Raw())
717 case STATE_IN_NOSCRIPT:
718 sanitizeHTML(rc, out, decoder.Raw())
719 }
720
721 case html.CommentToken:
722 // ignore comment. TODO : parse IE conditional comment
723
724 case html.DoctypeToken:
725 out.Write(decoder.Raw())
726 }
727 } else {
728 switch token {
729 case html.StartTagToken, html.SelfClosingTagToken:
730 tag, _ := decoder.TagName()
731 if inArray(tag, UNSAFE_ELEMENTS) {
732 unsafeElements = append(unsafeElements, tag)
733 }
734
735 case html.EndTagToken:
736 tag, _ := decoder.TagName()
737 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
738 unsafeElements = unsafeElements[:len(unsafeElements)-1]
739 }
740 }
741 }
742 }
743}
744
745func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
746 exclude := false
747 for _, attr := range attrs {
748 attrName := attr[0]
749 attrValue := attr[1]
750 if bytes.Equal(attrName, []byte("rel")) {
751 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
752 exclude = true
753 break
754 }
755 }
756 if bytes.Equal(attrName, []byte("as")) {
757 if bytes.Equal(attrValue, []byte("script")) {
758 exclude = true
759 break
760 }
761 }
762 }
763
764 if !exclude {
765 out.Write([]byte("<link"))
766 for _, attr := range attrs {
767 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
768 }
769 out.Write([]byte(">"))
770 }
771}
772
773func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
774 var http_equiv []byte
775 var content []byte
776
777 for _, attr := range attrs {
778 attrName := attr[0]
779 attrValue := attr[1]
780 if bytes.Equal(attrName, []byte("http-equiv")) {
781 http_equiv = bytes.ToLower(attrValue)
782 // exclude some <meta http-equiv="..." ..>
783 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
784 return
785 }
786 }
787 if bytes.Equal(attrName, []byte("content")) {
788 content = attrValue
789 }
790 if bytes.Equal(attrName, []byte("charset")) {
791 // exclude <meta charset="...">
792 return
793 }
794 }
795
796 out.Write([]byte("<meta"))
797 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
798 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
799 contentUrl := content[urlIndex+4:]
800 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
801 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
802 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
803 contentUrl = contentUrl[1 : len(contentUrl)-1]
804 }
805 }
806 // output proxify result
807 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
808 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
809 }
810 } else {
811 if len(http_equiv) > 0 {
812 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
813 }
814 sanitizeAttrs(rc, out, attrs)
815 }
816 out.Write([]byte(">"))
817}
818
819func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
820 for _, attr := range attrs {
821 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
822 }
823}
824
825func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
826 if inArray(attrName, SAFE_ATTRIBUTES) {
827 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
828 return
829 }
830 switch string(attrName) {
831 case "src", "href", "action":
832 if uri, err := rc.ProxifyURI(attrValue); err == nil {
833 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
834 } else if DEBUG {
835 log.Println("cannot proxify uri:", string(attrValue))
836 }
837 case "style":
838 cssAttr := bytes.NewBuffer(nil)
839 sanitizeCSS(rc, cssAttr, attrValue)
840 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
841 }
842}
843
844func mergeURIs(u1, u2 *url.URL) *url.URL {
845 if u2 == nil {
846 return u1
847 }
848 return u1.ResolveReference(u2)
849}
850
851// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
852// avoid memory allocation (except for the scheme)
853func sanitizeURI(uri []byte) ([]byte, string) {
854 first_rune_index := 0
855 first_rune_seen := false
856 scheme_last_index := -1
857 buffer := bytes.NewBuffer(make([]byte, 0, 10))
858
859 // remove trailing space and special characters
860 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
861
862 // loop over byte by byte
863 for i, c := range uri {
864 // ignore special characters and space (c <= 32)
865 if c > 32 {
866 // append to the lower case of the rune to buffer
867 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
868 c = c + 'a' - 'A'
869 }
870
871 buffer.WriteByte(c)
872
873 // update the first rune index that is not a special rune
874 if !first_rune_seen {
875 first_rune_index = i
876 first_rune_seen = true
877 }
878
879 if c == ':' {
880 // colon rune found, we have found the scheme
881 scheme_last_index = i
882 break
883 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
884 // special case : most probably a relative URI
885 break
886 }
887 }
888 }
889
890 if scheme_last_index != -1 {
891 // scheme found
892 // copy the "lower case without special runes scheme" before the ":" rune
893 scheme_start_index := scheme_last_index - buffer.Len() + 1
894 copy(uri[scheme_start_index:], buffer.Bytes())
895 // and return the result
896 return uri[scheme_start_index:], buffer.String()
897 } else {
898 // scheme NOT found
899 return uri[first_rune_index:], ""
900 }
901}
902
903func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
904 // sanitize URI
905 uri, scheme := sanitizeURI(uri)
906
907 // remove javascript protocol
908 if scheme == "javascript:" {
909 return "", nil
910 }
911
912 // TODO check malicious data: - e.g. data:script
913 if scheme == "data:" {
914 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
915 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
916 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
917 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
918 bytes.HasPrefix(uri, []byte("data:image/webp")) {
919 // should be safe
920 return string(uri), nil
921 } else {
922 // unsafe data
923 return "", nil
924 }
925 }
926
927 // parse the uri
928 u, err := url.Parse(string(uri))
929 if err != nil {
930 return "", err
931 }
932
933 // get the fragment (with the prefix "#")
934 fragment := ""
935 if len(u.Fragment) > 0 {
936 fragment = "#" + u.Fragment
937 }
938
939 // reset the fragment: it is not included in the mortyurl
940 u.Fragment = ""
941
942 // merge the URI with the document URI
943 u = mergeURIs(rc.BaseURL, u)
944
945 // simple internal link ?
946 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
947 if u.Scheme == rc.BaseURL.Scheme &&
948 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
949 u.Host == rc.BaseURL.Host &&
950 u.Path == rc.BaseURL.Path &&
951 u.RawQuery == rc.BaseURL.RawQuery {
952 // the fragment is the only difference between the document URI and the uri parameter
953 return fragment, nil
954 }
955
956 // return full URI and fragment (if not empty)
957 morty_uri := u.String()
958
959 if rc.Key == nil {
960 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
961 }
962 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
963}
964
965func inArray(b []byte, a [][]byte) bool {
966 for _, b2 := range a {
967 if bytes.Equal(b, b2) {
968 return true
969 }
970 }
971 return false
972}
973
974func hash(msg string, key []byte) string {
975 mac := hmac.New(sha256.New, key)
976 mac.Write([]byte(msg))
977 return hex.EncodeToString(mac.Sum(nil))
978}
979
980func verifyRequestURI(uri, hashMsg, key []byte) bool {
981 h := make([]byte, hex.DecodedLen(len(hashMsg)))
982 _, err := hex.Decode(h, hashMsg)
983 if err != nil {
984 if DEBUG {
985 log.Println("hmac error:", err)
986 }
987 return false
988 }
989 mac := hmac.New(sha256.New, key)
990 mac.Write(uri)
991 return hmac.Equal(h, mac.Sum(nil))
992}
993
994func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
995 ctx.SetContentType("text/html")
996 ctx.SetStatusCode(403)
997 ctx.Write([]byte(MORTY_HTML_PAGE_START))
998 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
999 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1000 ctx.Write([]byte(html.EscapeString(uri.String())))
1001 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1002 ctx.Write([]byte(html.EscapeString(uri.String())))
1003 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1004 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1005}
1006
1007func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
1008 ctx.SetContentType("text/html; charset=UTF-8")
1009 ctx.SetStatusCode(statusCode)
1010 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1011 if err != nil {
1012 if DEBUG {
1013 log.Println("error:", err)
1014 }
1015 ctx.Write([]byte("<h2>Error: "))
1016 ctx.Write([]byte(html.EscapeString(err.Error())))
1017 ctx.Write([]byte("</h2>"))
1018 }
1019 if p.Key == nil {
1020 ctx.Write([]byte(`
1021 <form action="post">
1022 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1023 <input type="submit" value="go" />
1024 </form>`))
1025 } else {
1026 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1027 }
1028 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1029}
1030
1031func main() {
1032 default_listen_addr := os.Getenv("MORTY_ADDRESS")
1033 if default_listen_addr == "" {
1034 default_listen_addr = "127.0.0.1:3000"
1035 }
1036 default_key := os.Getenv("MORTY_KEY")
1037 listen := flag.String("listen", default_listen_addr, "Listen address")
1038 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1039 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
1040 version := flag.Bool("version", false, "Show version")
1041 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
1042 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
1043 flag.Parse()
1044
1045 if *version {
1046 fmt.Println(VERSION)
1047 return
1048 }
1049
1050 if *ipv6 {
1051 CLIENT.DialDualStack = true
1052 }
1053
1054 if *socks5 != "" {
1055 // this disables CLIENT.DialDualStack
1056 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1057 }
1058
1059 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
1060
1061 if *key != "" {
1062 var err error
1063 p.Key, err = base64.StdEncoding.DecodeString(*key)
1064 if err != nil {
1065 log.Fatal("Error parsing -key", err.Error())
1066 os.Exit(1)
1067 }
1068 }
1069
1070 log.Println("listening on", *listen)
1071
1072 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1073 log.Fatal("Error in ListenAndServe:", err)
1074 }
1075}
Note: See TracBrowser for help on using the repository browser.