source: code/trunk/morty.go@ 131

Last change on this file since 131 was 131, checked in by alex, 5 years ago

Merge remote-tracking branch 'origin/master' into redirect

File size: 30.4 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "github.com/asciimoo/morty/config"
31 "github.com/asciimoo/morty/contenttype"
32)
33
34const (
35 STATE_DEFAULT int = 0
36 STATE_IN_STYLE int = 1
37 STATE_IN_NOSCRIPT int = 2
38)
39
40const VERSION = "v0.2.0"
41
42const MAX_REDIRECT_COUNT = 5
43
44var CLIENT *fasthttp.Client = &fasthttp.Client{
45 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
46 ReadBufferSize: 16 * 1024, // 16K
47}
48
49var cfg *config.Config = config.DefaultConfig
50
51var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
52 // html
53 contenttype.NewFilterEquals("text", "html", ""),
54 contenttype.NewFilterEquals("application", "xhtml", "xml"),
55 // css
56 contenttype.NewFilterEquals("text", "css", ""),
57 // images
58 contenttype.NewFilterEquals("image", "gif", ""),
59 contenttype.NewFilterEquals("image", "png", ""),
60 contenttype.NewFilterEquals("image", "jpeg", ""),
61 contenttype.NewFilterEquals("image", "pjpeg", ""),
62 contenttype.NewFilterEquals("image", "webp", ""),
63 contenttype.NewFilterEquals("image", "tiff", ""),
64 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
65 contenttype.NewFilterEquals("image", "bmp", ""),
66 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
67 contenttype.NewFilterEquals("image", "x-icon", ""),
68 // fonts
69 contenttype.NewFilterEquals("application", "font-otf", ""),
70 contenttype.NewFilterEquals("application", "font-ttf", ""),
71 contenttype.NewFilterEquals("application", "font-woff", ""),
72 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
73})
74
75var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
76 // texts
77 contenttype.NewFilterEquals("text", "csv", ""),
78 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
79 contenttype.NewFilterEquals("text", "plain", ""),
80 // API
81 contenttype.NewFilterEquals("application", "json", ""),
82 // Documents
83 contenttype.NewFilterEquals("application", "x-latex", ""),
84 contenttype.NewFilterEquals("application", "pdf", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
87 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
88 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
89 // Compressed archives
90 contenttype.NewFilterEquals("application", "zip", ""),
91 contenttype.NewFilterEquals("application", "gzip", ""),
92 contenttype.NewFilterEquals("application", "x-compressed", ""),
93 contenttype.NewFilterEquals("application", "x-gtar", ""),
94 contenttype.NewFilterEquals("application", "x-compress", ""),
95 // Generic binary
96 contenttype.NewFilterEquals("application", "octet-stream", ""),
97})
98
99var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
100 "charset": true,
101}
102
103var UNSAFE_ELEMENTS [][]byte = [][]byte{
104 []byte("applet"),
105 []byte("canvas"),
106 []byte("embed"),
107 //[]byte("iframe"),
108 []byte("math"),
109 []byte("script"),
110 []byte("svg"),
111}
112
113var SAFE_ATTRIBUTES [][]byte = [][]byte{
114 []byte("abbr"),
115 []byte("accesskey"),
116 []byte("align"),
117 []byte("alt"),
118 []byte("as"),
119 []byte("autocomplete"),
120 []byte("charset"),
121 []byte("checked"),
122 []byte("class"),
123 []byte("content"),
124 []byte("contenteditable"),
125 []byte("contextmenu"),
126 []byte("dir"),
127 []byte("for"),
128 []byte("height"),
129 []byte("hidden"),
130 []byte("hreflang"),
131 []byte("id"),
132 []byte("lang"),
133 []byte("media"),
134 []byte("method"),
135 []byte("name"),
136 []byte("nowrap"),
137 []byte("placeholder"),
138 []byte("property"),
139 []byte("rel"),
140 []byte("spellcheck"),
141 []byte("tabindex"),
142 []byte("target"),
143 []byte("title"),
144 []byte("translate"),
145 []byte("type"),
146 []byte("value"),
147 []byte("width"),
148}
149
150var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
151 []byte("alternate"),
152 []byte("archives"),
153 []byte("author"),
154 []byte("copyright"),
155 []byte("first"),
156 []byte("help"),
157 []byte("icon"),
158 []byte("index"),
159 []byte("last"),
160 []byte("license"),
161 []byte("manifest"),
162 []byte("next"),
163 []byte("pingback"),
164 []byte("prev"),
165 []byte("publisher"),
166 []byte("search"),
167 []byte("shortcut icon"),
168 []byte("stylesheet"),
169 []byte("up"),
170}
171
172var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
173 // X-UA-Compatible will be added automaticaly, so it can be skipped
174 []byte("date"),
175 []byte("last-modified"),
176 []byte("refresh"), // URL rewrite
177 // []byte("location"), TODO URL rewrite
178 []byte("content-language"),
179}
180
181var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
182
183type Proxy struct {
184 Key []byte
185 RequestTimeout time.Duration
186 FollowRedirect bool
187}
188
189type RequestConfig struct {
190 Key []byte
191 BaseURL *url.URL
192 BodyInjected bool
193}
194
195type HTMLBodyExtParam struct {
196 BaseURL string
197 HasMortyKey bool
198}
199
200type HTMLFormExtParam struct {
201 BaseURL string
202 MortyHash string
203}
204
205var HTML_FORM_EXTENSION *template.Template
206var HTML_BODY_EXTENSION *template.Template
207var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
208<meta http-equiv="X-UA-Compatible" content="IE=edge">
209<meta name="referrer" content="no-referrer">
210`
211
212var MORTY_HTML_PAGE_START string = `<!doctype html>
213<html>
214<head>
215<title>MortyProxy</title>
216<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
217<style>
218html { height: 100%; }
219body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
220input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
221input[placeholder] { width:80%; }
222a { text-decoration: none; #2980b9; }
223h1, h2 { font-weight: 200; margin-bottom: 2rem; }
224h1 { font-size: 3em; }
225.container { flex:1; min-height: 100%; margin-bottom: 1em; }
226.footer { margin: 1em; }
227.footer p { font-size: 0.8em; }
228</style>
229</head>
230<body>
231 <div class="container">
232 <h1>MortyProxy</h1>
233`
234
235var MORTY_HTML_PAGE_END string = `
236 </div>
237 <div class="footer">
238 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
239 <a href="https://github.com/asciimoo/morty">view on github</a>
240 </p>
241 </div>
242</body>
243</html>`
244
245var FAVICON_BYTES []byte
246
247func init() {
248 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
249
250 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
251 var err error
252 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
253 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
254 if err != nil {
255 panic(err)
256 }
257 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
258<input type="checkbox" id="mortytoggle" autocomplete="off" />
259<div id="mortyheader">
260 <form method="get">
261 <label for="mortytoggle">hide</label>
262 <span><a href="/">Morty Proxy</a></span>
263 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
264 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
265 </form>
266</div>
267<style>
268body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
269#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
270#mortyheader * { padding: 0; margin: 0; }
271#mortyheader p { padding: 0 0 0.7em 0; display: block; }
272#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
273#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
274#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
275input[type=checkbox]#mortytoggle { display: none; }
276input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
277#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
278</style>
279`)
280 if err != nil {
281 panic(err)
282 }
283}
284
285func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
286
287 if appRequestHandler(ctx) {
288 return
289 }
290
291 requestHash := popRequestParam(ctx, []byte("mortyhash"))
292
293 requestURI := popRequestParam(ctx, []byte("mortyurl"))
294
295 if requestURI == nil {
296 p.serveMainPage(ctx, 200, nil)
297 return
298 }
299
300 if p.Key != nil {
301 if !verifyRequestURI(requestURI, requestHash, p.Key) {
302 // HTTP status code 403 : Forbidden
303 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
304 return
305 }
306 }
307
308 requestURIQuery := ctx.QueryArgs().QueryString()
309 if len(requestURIQuery) > 0 {
310 if bytes.ContainsRune(requestURI, '?') {
311 requestURI = append(requestURI, '&')
312 } else {
313 requestURI = append(requestURI, '?')
314 }
315 requestURI = append(requestURI, requestURIQuery...)
316 }
317
318 p.ProcessUri(ctx, string(requestURI), 0)
319}
320
321func (p *Proxy) ProcessUri(ctx *fasthttp.RequestCtx, requestURIStr string, redirectCount int) {
322 parsedURI, err := url.Parse(requestURIStr)
323
324 if err != nil {
325 // HTTP status code 500 : Internal Server Error
326 p.serveMainPage(ctx, 500, err)
327 return
328 }
329
330 if parsedURI.Scheme == "" {
331 requestURIStr = "https://" + requestURIStr
332 parsedURI, err = url.Parse(requestURIStr)
333 if err != nil {
334 p.serveMainPage(ctx, 500, err)
335 return
336 }
337 }
338
339 // Serve an intermediate page for protocols other than HTTP(S)
340 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
341 p.serveExitMortyPage(ctx, parsedURI)
342 return
343 }
344
345 req := fasthttp.AcquireRequest()
346 defer fasthttp.ReleaseRequest(req)
347 req.SetConnectionClose()
348
349 if cfg.Debug {
350 log.Println(string(ctx.Method()), requestURIStr)
351 }
352
353 req.SetRequestURI(requestURIStr)
354 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
355
356 resp := fasthttp.AcquireResponse()
357 defer fasthttp.ReleaseResponse(resp)
358
359 req.Header.SetMethodBytes(ctx.Method())
360 if ctx.IsPost() || ctx.IsPut() {
361 req.SetBody(ctx.PostBody())
362 }
363
364 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
365
366 if err != nil {
367 if err == fasthttp.ErrTimeout {
368 // HTTP status code 504 : Gateway Time-Out
369 p.serveMainPage(ctx, 504, err)
370 } else {
371 // HTTP status code 500 : Internal Server Error
372 p.serveMainPage(ctx, 500, err)
373 }
374 return
375 }
376
377 if resp.StatusCode() != 200 {
378 switch resp.StatusCode() {
379 case 301, 302, 303, 307, 308:
380 loc := resp.Header.Peek("Location")
381 if loc != nil {
382 if p.FollowRedirect && ctx.IsGet() {
383 // GET method: Morty follows the redirect
384 if redirectCount < MAX_REDIRECT_COUNT {
385 if cfg.Debug {
386 log.Println("follow redirect to", string(loc))
387 }
388 p.ProcessUri(ctx, string(loc), redirectCount+1)
389 } else {
390 p.serveMainPage(ctx, 310, errors.New("Too many redirects"))
391 }
392 return
393 } else {
394 // Other HTTP methods: Morty does NOT follow the redirect
395 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
396 url, err := rc.ProxifyURI(loc)
397 if err == nil {
398 ctx.SetStatusCode(resp.StatusCode())
399 ctx.Response.Header.Add("Location", url)
400 if cfg.Debug {
401 log.Println("redirect to", string(loc))
402 }
403 return
404 }
405 }
406 }
407 }
408 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
409 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
410 return
411 }
412
413 contentTypeBytes := resp.Header.Peek("Content-Type")
414
415 if contentTypeBytes == nil {
416 // HTTP status code 503 : Service Unavailable
417 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
418 return
419 }
420
421 contentTypeString := string(contentTypeBytes)
422
423 // decode Content-Type header
424 contentType, error := contenttype.ParseContentType(contentTypeString)
425 if error != nil {
426 // HTTP status code 503 : Service Unavailable
427 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
428 return
429 }
430
431 // content-disposition
432 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
433
434 // check content type
435 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
436 // it is not a usual content type
437 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
438 // force attachment for allowed content type
439 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
440 } else {
441 // deny access to forbidden content type
442 // HTTP status code 403 : Forbidden
443 p.serveMainPage(ctx, 403, errors.New("forbidden content type "+parsedURI.String()))
444 return
445 }
446 }
447
448 // HACK : replace */xhtml by text/html
449 if contentType.SubType == "xhtml" {
450 contentType.TopLevelType = "text"
451 contentType.SubType = "html"
452 contentType.Suffix = ""
453 }
454
455 // conversion to UTF-8
456 var responseBody []byte
457
458 if contentType.TopLevelType == "text" {
459 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
460 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
461 responseBody, err = e.NewDecoder().Bytes(resp.Body())
462 if err != nil {
463 // HTTP status code 503 : Service Unavailable
464 p.serveMainPage(ctx, 503, err)
465 return
466 }
467 } else {
468 responseBody = resp.Body()
469 }
470 // update the charset or specify it
471 contentType.Parameters["charset"] = "UTF-8"
472 } else {
473 responseBody = resp.Body()
474 }
475
476 //
477 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
478
479 // set the content type
480 ctx.SetContentType(contentType.String())
481
482 // output according to MIME type
483 switch {
484 case contentType.SubType == "css" && contentType.Suffix == "":
485 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
486 case contentType.SubType == "html" && contentType.Suffix == "":
487 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
488 sanitizeHTML(rc, ctx, responseBody)
489 if !rc.BodyInjected {
490 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
491 if len(rc.Key) > 0 {
492 p.HasMortyKey = true
493 }
494 err := HTML_BODY_EXTENSION.Execute(ctx, p)
495 if err != nil {
496 if cfg.Debug {
497 fmt.Println("failed to inject body extension", err)
498 }
499 }
500 }
501 default:
502 if contentDispositionBytes != nil {
503 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
504 }
505 ctx.Write(responseBody)
506 }
507}
508
509// force content-disposition to attachment
510func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
511 var contentDispositionParams map[string]string
512
513 if contentDispositionBytes != nil {
514 var err error
515 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
516 if err != nil {
517 contentDispositionParams = make(map[string]string)
518 }
519 } else {
520 contentDispositionParams = make(map[string]string)
521 }
522
523 _, fileNameDefined := contentDispositionParams["filename"]
524 if !fileNameDefined {
525 // TODO : sanitize filename
526 contentDispositionParams["fileName"] = filepath.Base(url.Path)
527 }
528
529 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
530}
531
532func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
533 // serve robots.txt
534 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
535 ctx.SetContentType("text/plain")
536 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
537 return true
538 }
539
540 // server favicon.ico
541 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
542 ctx.SetContentType("image/png")
543 ctx.Write(FAVICON_BYTES)
544 return true
545 }
546
547 return false
548}
549
550func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
551 param := ctx.QueryArgs().PeekBytes(paramName)
552
553 if param == nil {
554 param = ctx.PostArgs().PeekBytes(paramName)
555 ctx.PostArgs().DelBytes(paramName)
556 }
557 ctx.QueryArgs().DelBytes(paramName)
558
559 return param
560}
561
562func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
563 // TODO
564
565 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
566
567 if urlSlices == nil {
568 out.Write(css)
569 return
570 }
571
572 startIndex := 0
573
574 for _, s := range urlSlices {
575 urlStart := s[4]
576 urlEnd := s[5]
577
578 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
579 out.Write(css[startIndex:urlStart])
580 out.Write([]byte(uri))
581 startIndex = urlEnd
582 } else if cfg.Debug {
583 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
584 }
585 }
586 if startIndex < len(css) {
587 out.Write(css[startIndex:len(css)])
588 }
589}
590
591func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
592 r := bytes.NewReader(htmlDoc)
593 decoder := html.NewTokenizer(r)
594 decoder.AllowCDATA(true)
595
596 unsafeElements := make([][]byte, 0, 8)
597 state := STATE_DEFAULT
598 for {
599 token := decoder.Next()
600 if token == html.ErrorToken {
601 err := decoder.Err()
602 if err != io.EOF {
603 log.Println("failed to parse HTML")
604 }
605 break
606 }
607
608 if len(unsafeElements) == 0 {
609
610 switch token {
611 case html.StartTagToken, html.SelfClosingTagToken:
612 tag, hasAttrs := decoder.TagName()
613 safe := !inArray(tag, UNSAFE_ELEMENTS)
614 if !safe {
615 if token != html.SelfClosingTagToken {
616 var unsafeTag []byte = make([]byte, len(tag))
617 copy(unsafeTag, tag)
618 unsafeElements = append(unsafeElements, unsafeTag)
619 }
620 break
621 }
622 if bytes.Equal(tag, []byte("base")) {
623 for {
624 attrName, attrValue, moreAttr := decoder.TagAttr()
625 if bytes.Equal(attrName, []byte("href")) {
626 parsedURI, err := url.Parse(string(attrValue))
627 if err == nil {
628 rc.BaseURL = parsedURI
629 }
630 }
631 if !moreAttr {
632 break
633 }
634 }
635 break
636 }
637 if bytes.Equal(tag, []byte("noscript")) {
638 state = STATE_IN_NOSCRIPT
639 break
640 }
641 var attrs [][][]byte
642 if hasAttrs {
643 for {
644 attrName, attrValue, moreAttr := decoder.TagAttr()
645 attrs = append(attrs, [][]byte{
646 attrName,
647 attrValue,
648 []byte(html.EscapeString(string(attrValue))),
649 })
650 if !moreAttr {
651 break
652 }
653 }
654 }
655 if bytes.Equal(tag, []byte("link")) {
656 sanitizeLinkTag(rc, out, attrs)
657 break
658 }
659
660 if bytes.Equal(tag, []byte("meta")) {
661 sanitizeMetaTag(rc, out, attrs)
662 break
663 }
664
665 fmt.Fprintf(out, "<%s", tag)
666
667 if hasAttrs {
668 sanitizeAttrs(rc, out, attrs)
669 }
670
671 if token == html.SelfClosingTagToken {
672 fmt.Fprintf(out, " />")
673 } else {
674 fmt.Fprintf(out, ">")
675 if bytes.Equal(tag, []byte("style")) {
676 state = STATE_IN_STYLE
677 }
678 }
679
680 if bytes.Equal(tag, []byte("head")) {
681 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
682 }
683
684 if bytes.Equal(tag, []byte("form")) {
685 var formURL *url.URL
686 for _, attr := range attrs {
687 if bytes.Equal(attr[0], []byte("action")) {
688 formURL, _ = url.Parse(string(attr[1]))
689 formURL = mergeURIs(rc.BaseURL, formURL)
690 break
691 }
692 }
693 if formURL == nil {
694 formURL = rc.BaseURL
695 }
696 urlStr := formURL.String()
697 var key string
698 if rc.Key != nil {
699 key = hash(urlStr, rc.Key)
700 }
701 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
702 if err != nil {
703 if cfg.Debug {
704 fmt.Println("failed to inject body extension", err)
705 }
706 }
707 }
708
709 case html.EndTagToken:
710 tag, _ := decoder.TagName()
711 writeEndTag := true
712 switch string(tag) {
713 case "body":
714 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
715 if len(rc.Key) > 0 {
716 p.HasMortyKey = true
717 }
718 err := HTML_BODY_EXTENSION.Execute(out, p)
719 if err != nil {
720 if cfg.Debug {
721 fmt.Println("failed to inject body extension", err)
722 }
723 }
724 rc.BodyInjected = true
725 case "style":
726 state = STATE_DEFAULT
727 case "noscript":
728 state = STATE_DEFAULT
729 writeEndTag = false
730 }
731 // skip noscript tags - only the tag, not the content, because javascript is sanitized
732 if writeEndTag {
733 fmt.Fprintf(out, "</%s>", tag)
734 }
735
736 case html.TextToken:
737 switch state {
738 case STATE_DEFAULT:
739 fmt.Fprintf(out, "%s", decoder.Raw())
740 case STATE_IN_STYLE:
741 sanitizeCSS(rc, out, decoder.Raw())
742 case STATE_IN_NOSCRIPT:
743 sanitizeHTML(rc, out, decoder.Raw())
744 }
745
746 case html.CommentToken:
747 // ignore comment. TODO : parse IE conditional comment
748
749 case html.DoctypeToken:
750 out.Write(decoder.Raw())
751 }
752 } else {
753 switch token {
754 case html.StartTagToken, html.SelfClosingTagToken:
755 tag, _ := decoder.TagName()
756 if inArray(tag, UNSAFE_ELEMENTS) {
757 unsafeElements = append(unsafeElements, tag)
758 }
759
760 case html.EndTagToken:
761 tag, _ := decoder.TagName()
762 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
763 unsafeElements = unsafeElements[:len(unsafeElements)-1]
764 }
765 }
766 }
767 }
768}
769
770func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
771 exclude := false
772 for _, attr := range attrs {
773 attrName := attr[0]
774 attrValue := attr[1]
775 if bytes.Equal(attrName, []byte("rel")) {
776 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
777 exclude = true
778 break
779 }
780 }
781 if bytes.Equal(attrName, []byte("as")) {
782 if bytes.Equal(attrValue, []byte("script")) {
783 exclude = true
784 break
785 }
786 }
787 }
788
789 if !exclude {
790 out.Write([]byte("<link"))
791 for _, attr := range attrs {
792 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
793 }
794 out.Write([]byte(">"))
795 }
796}
797
798func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
799 var http_equiv []byte
800 var content []byte
801
802 for _, attr := range attrs {
803 attrName := attr[0]
804 attrValue := attr[1]
805 if bytes.Equal(attrName, []byte("http-equiv")) {
806 http_equiv = bytes.ToLower(attrValue)
807 // exclude some <meta http-equiv="..." ..>
808 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
809 return
810 }
811 }
812 if bytes.Equal(attrName, []byte("content")) {
813 content = attrValue
814 }
815 if bytes.Equal(attrName, []byte("charset")) {
816 // exclude <meta charset="...">
817 return
818 }
819 }
820
821 out.Write([]byte("<meta"))
822 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
823 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
824 contentUrl := content[urlIndex+4:]
825 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
826 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
827 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
828 contentUrl = contentUrl[1 : len(contentUrl)-1]
829 }
830 }
831 // output proxify result
832 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
833 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
834 }
835 } else {
836 if len(http_equiv) > 0 {
837 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
838 }
839 sanitizeAttrs(rc, out, attrs)
840 }
841 out.Write([]byte(">"))
842}
843
844func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
845 for _, attr := range attrs {
846 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
847 }
848}
849
850func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
851 if inArray(attrName, SAFE_ATTRIBUTES) {
852 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
853 return
854 }
855 switch string(attrName) {
856 case "src", "href", "action":
857 if uri, err := rc.ProxifyURI(attrValue); err == nil {
858 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
859 } else if cfg.Debug {
860 log.Println("cannot proxify uri:", string(attrValue))
861 }
862 case "style":
863 cssAttr := bytes.NewBuffer(nil)
864 sanitizeCSS(rc, cssAttr, attrValue)
865 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
866 }
867}
868
869func mergeURIs(u1, u2 *url.URL) *url.URL {
870 if u2 == nil {
871 return u1
872 }
873 return u1.ResolveReference(u2)
874}
875
876// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
877// avoid memory allocation (except for the scheme)
878func sanitizeURI(uri []byte) ([]byte, string) {
879 first_rune_index := 0
880 first_rune_seen := false
881 scheme_last_index := -1
882 buffer := bytes.NewBuffer(make([]byte, 0, 10))
883
884 // remove trailing space and special characters
885 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
886
887 // loop over byte by byte
888 for i, c := range uri {
889 // ignore special characters and space (c <= 32)
890 if c > 32 {
891 // append to the lower case of the rune to buffer
892 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
893 c = c + 'a' - 'A'
894 }
895
896 buffer.WriteByte(c)
897
898 // update the first rune index that is not a special rune
899 if !first_rune_seen {
900 first_rune_index = i
901 first_rune_seen = true
902 }
903
904 if c == ':' {
905 // colon rune found, we have found the scheme
906 scheme_last_index = i
907 break
908 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
909 // special case : most probably a relative URI
910 break
911 }
912 }
913 }
914
915 if scheme_last_index != -1 {
916 // scheme found
917 // copy the "lower case without special runes scheme" before the ":" rune
918 scheme_start_index := scheme_last_index - buffer.Len() + 1
919 copy(uri[scheme_start_index:], buffer.Bytes())
920 // and return the result
921 return uri[scheme_start_index:], buffer.String()
922 } else {
923 // scheme NOT found
924 return uri[first_rune_index:], ""
925 }
926}
927
928func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
929 // sanitize URI
930 uri, scheme := sanitizeURI(uri)
931
932 // remove javascript protocol
933 if scheme == "javascript:" {
934 return "", nil
935 }
936
937 // TODO check malicious data: - e.g. data:script
938 if scheme == "data:" {
939 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
940 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
941 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
942 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
943 bytes.HasPrefix(uri, []byte("data:image/webp")) {
944 // should be safe
945 return string(uri), nil
946 } else {
947 // unsafe data
948 return "", nil
949 }
950 }
951
952 // parse the uri
953 u, err := url.Parse(string(uri))
954 if err != nil {
955 return "", err
956 }
957
958 // get the fragment (with the prefix "#")
959 fragment := ""
960 if len(u.Fragment) > 0 {
961 fragment = "#" + u.Fragment
962 }
963
964 // reset the fragment: it is not included in the mortyurl
965 u.Fragment = ""
966
967 // merge the URI with the document URI
968 u = mergeURIs(rc.BaseURL, u)
969
970 // simple internal link ?
971 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
972 if u.Scheme == rc.BaseURL.Scheme &&
973 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
974 u.Host == rc.BaseURL.Host &&
975 u.Path == rc.BaseURL.Path &&
976 u.RawQuery == rc.BaseURL.RawQuery {
977 // the fragment is the only difference between the document URI and the uri parameter
978 return fragment, nil
979 }
980
981 // return full URI and fragment (if not empty)
982 morty_uri := u.String()
983
984 if rc.Key == nil {
985 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
986 }
987 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
988}
989
990func inArray(b []byte, a [][]byte) bool {
991 for _, b2 := range a {
992 if bytes.Equal(b, b2) {
993 return true
994 }
995 }
996 return false
997}
998
999func hash(msg string, key []byte) string {
1000 mac := hmac.New(sha256.New, key)
1001 mac.Write([]byte(msg))
1002 return hex.EncodeToString(mac.Sum(nil))
1003}
1004
1005func verifyRequestURI(uri, hashMsg, key []byte) bool {
1006 h := make([]byte, hex.DecodedLen(len(hashMsg)))
1007 _, err := hex.Decode(h, hashMsg)
1008 if err != nil {
1009 if cfg.Debug {
1010 log.Println("hmac error:", err)
1011 }
1012 return false
1013 }
1014 mac := hmac.New(sha256.New, key)
1015 mac.Write(uri)
1016 return hmac.Equal(h, mac.Sum(nil))
1017}
1018
1019func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
1020 ctx.SetContentType("text/html")
1021 ctx.SetStatusCode(403)
1022 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1023 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
1024 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1025 ctx.Write([]byte(html.EscapeString(uri.String())))
1026 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1027 ctx.Write([]byte(html.EscapeString(uri.String())))
1028 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1029 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1030}
1031
1032func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
1033 ctx.SetContentType("text/html; charset=UTF-8")
1034 ctx.SetStatusCode(statusCode)
1035 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1036 if err != nil {
1037 if cfg.Debug {
1038 log.Println("error:", err)
1039 }
1040 ctx.Write([]byte("<h2>Error: "))
1041 ctx.Write([]byte(html.EscapeString(err.Error())))
1042 ctx.Write([]byte("</h2>"))
1043 }
1044 if p.Key == nil {
1045 ctx.Write([]byte(`
1046 <form action="post">
1047 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1048 <input type="submit" value="go" />
1049 </form>`))
1050 } else {
1051 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1052 }
1053 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1054}
1055
1056func main() {
1057 cfg.ListenAddress = *flag.String("listen", cfg.ListenAddress, "Listen address")
1058 cfg.Key = *flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1059 cfg.IPV6 = *flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
1060 cfg.Debug = *flag.Bool("debug", cfg.Debug, "Debug mode")
1061 cfg.RequestTimeout = *flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
1062 cfg.FollowRedirect = *flag.Bool("followredirect", cfg.FollowRedirect, "Follow HTTP GET redirect")
1063 version := flag.Bool("version", false, "Show version")
1064 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
1065 flag.Parse()
1066
1067 if *version {
1068 fmt.Println(VERSION)
1069 return
1070 }
1071
1072 if *socks5 != "" {
1073 // this disables CLIENT.DialDualStack
1074 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1075 }
1076 if cfg.IPV6 {
1077 CLIENT.Dial = fasthttp.DialDualStack
1078 }
1079
1080 p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second,
1081 FollowRedirect: cfg.FollowRedirect}
1082
1083 if cfg.Key != "" {
1084 var err error
1085 p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
1086 if err != nil {
1087 log.Fatal("Error parsing -key", err.Error())
1088 os.Exit(1)
1089 }
1090 }
1091
1092 log.Println("listening on", cfg.ListenAddress)
1093
1094 if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
1095 log.Fatal("Error in ListenAndServe:", err)
1096 }
1097}
Note: See TracBrowser for help on using the repository browser.