source: code/trunk/morty.go@ 130

Last change on this file since 130 was 130, checked in by asciimoo, 5 years ago

[fix] cleanup rebase

File size: 29.7 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "html/template"
13 "io"
14 "log"
15 "mime"
16 "net/url"
17 "os"
18 "path/filepath"
19 "regexp"
20 "strings"
21 "time"
22 "unicode/utf8"
23
24 "github.com/valyala/fasthttp"
25 "github.com/valyala/fasthttp/fasthttpproxy"
26 "golang.org/x/net/html"
27 "golang.org/x/net/html/charset"
28 "golang.org/x/text/encoding"
29
30 "github.com/asciimoo/morty/config"
31 "github.com/asciimoo/morty/contenttype"
32)
33
34const (
35 STATE_DEFAULT int = 0
36 STATE_IN_STYLE int = 1
37 STATE_IN_NOSCRIPT int = 2
38)
39
40const VERSION = "v0.2.0"
41
42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
44 ReadBufferSize: 16 * 1024, // 16K
45}
46
47var cfg *config.Config = config.DefaultConfig
48
49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
65 contenttype.NewFilterEquals("image", "x-icon", ""),
66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
106 []byte("math"),
107 []byte("script"),
108 []byte("svg"),
109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
116 []byte("as"),
117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
128 []byte("hreflang"),
129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
148var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
149 []byte("alternate"),
150 []byte("archives"),
151 []byte("author"),
152 []byte("copyright"),
153 []byte("first"),
154 []byte("help"),
155 []byte("icon"),
156 []byte("index"),
157 []byte("last"),
158 []byte("license"),
159 []byte("manifest"),
160 []byte("next"),
161 []byte("pingback"),
162 []byte("prev"),
163 []byte("publisher"),
164 []byte("search"),
165 []byte("shortcut icon"),
166 []byte("stylesheet"),
167 []byte("up"),
168}
169
170var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
171 // X-UA-Compatible will be added automaticaly, so it can be skipped
172 []byte("date"),
173 []byte("last-modified"),
174 []byte("refresh"), // URL rewrite
175 // []byte("location"), TODO URL rewrite
176 []byte("content-language"),
177}
178
179var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
180
181type Proxy struct {
182 Key []byte
183 RequestTimeout time.Duration
184}
185
186type RequestConfig struct {
187 Key []byte
188 BaseURL *url.URL
189 BodyInjected bool
190}
191
192type HTMLBodyExtParam struct {
193 BaseURL string
194 HasMortyKey bool
195}
196
197type HTMLFormExtParam struct {
198 BaseURL string
199 MortyHash string
200}
201
202var HTML_FORM_EXTENSION *template.Template
203var HTML_BODY_EXTENSION *template.Template
204var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
205<meta http-equiv="X-UA-Compatible" content="IE=edge">
206<meta name="referrer" content="no-referrer">
207`
208
209var MORTY_HTML_PAGE_START string = `<!doctype html>
210<html>
211<head>
212<title>MortyProxy</title>
213<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
214<style>
215html { height: 100%; }
216body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
217input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
218input[placeholder] { width:80%; }
219a { text-decoration: none; #2980b9; }
220h1, h2 { font-weight: 200; margin-bottom: 2rem; }
221h1 { font-size: 3em; }
222.container { flex:1; min-height: 100%; margin-bottom: 1em; }
223.footer { margin: 1em; }
224.footer p { font-size: 0.8em; }
225</style>
226</head>
227<body>
228 <div class="container">
229 <h1>MortyProxy</h1>
230`
231
232var MORTY_HTML_PAGE_END string = `
233 </div>
234 <div class="footer">
235 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
236 <a href="https://github.com/asciimoo/morty">view on github</a>
237 </p>
238 </div>
239</body>
240</html>`
241
242var FAVICON_BYTES []byte
243
244func init() {
245 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
246
247 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
248 var err error
249 HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
250 `<input type="hidden" name="mortyurl" value="{{.BaseURL}}" />{{if .MortyHash}}<input type="hidden" name="mortyhash" value="{{.MortyHash}}" />{{end}}`)
251 if err != nil {
252 panic(err)
253 }
254 HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
255<input type="checkbox" id="mortytoggle" autocomplete="off" />
256<div id="mortyheader">
257 <form method="get">
258 <label for="mortytoggle">hide</label>
259 <span><a href="/">Morty Proxy</a></span>
260 <input type="url" value="{{.BaseURL}}" name="mortyurl" {{if .HasMortyKey }}readonly="true"{{end}} />
261 This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
262 </form>
263</div>
264<style>
265body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
266#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
267#mortyheader * { padding: 0; margin: 0; }
268#mortyheader p { padding: 0 0 0.7em 0; display: block; }
269#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
270#mortyheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
271#mortyheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
272input[type=checkbox]#mortytoggle { display: none; }
273input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
274#mortyheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
275</style>
276`)
277 if err != nil {
278 panic(err)
279 }
280}
281
282func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
283
284 if appRequestHandler(ctx) {
285 return
286 }
287
288 requestHash := popRequestParam(ctx, []byte("mortyhash"))
289
290 requestURI := popRequestParam(ctx, []byte("mortyurl"))
291
292 if requestURI == nil {
293 p.serveMainPage(ctx, 200, nil)
294 return
295 }
296
297 if p.Key != nil {
298 if !verifyRequestURI(requestURI, requestHash, p.Key) {
299 // HTTP status code 403 : Forbidden
300 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
301 return
302 }
303 }
304
305 requestURIQuery := ctx.QueryArgs().QueryString()
306 if len(requestURIQuery) > 0 {
307 if bytes.ContainsRune(requestURI, '?') {
308 requestURI = append(requestURI, '&')
309 } else {
310 requestURI = append(requestURI, '?')
311 }
312 requestURI = append(requestURI, requestURIQuery...)
313 }
314
315 parsedURI, err := url.Parse(string(requestURI))
316
317 if err != nil {
318 // HTTP status code 500 : Internal Server Error
319 p.serveMainPage(ctx, 500, err)
320 return
321 }
322
323 if parsedURI.Scheme == "" {
324 requestURI = append([]byte("https://"), requestURI...)
325 parsedURI, err = url.Parse(string(requestURI))
326 if err != nil {
327 p.serveMainPage(ctx, 500, err)
328 return
329 }
330 }
331
332 // Serve an intermediate page for protocols other than HTTP(S)
333 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
334 p.serveExitMortyPage(ctx, parsedURI)
335 return
336 }
337
338 req := fasthttp.AcquireRequest()
339 defer fasthttp.ReleaseRequest(req)
340 req.SetConnectionClose()
341
342 requestURIStr := string(requestURI)
343
344 if cfg.Debug {
345 log.Println(string(ctx.Method()), requestURIStr)
346 }
347
348 req.SetRequestURI(requestURIStr)
349 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
350
351 resp := fasthttp.AcquireResponse()
352 defer fasthttp.ReleaseResponse(resp)
353
354 req.Header.SetMethodBytes(ctx.Method())
355 if ctx.IsPost() || ctx.IsPut() {
356 req.SetBody(ctx.PostBody())
357 }
358
359 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
360
361 if err != nil {
362 if err == fasthttp.ErrTimeout {
363 // HTTP status code 504 : Gateway Time-Out
364 p.serveMainPage(ctx, 504, err)
365 } else {
366 // HTTP status code 500 : Internal Server Error
367 p.serveMainPage(ctx, 500, err)
368 }
369 return
370 }
371
372 if resp.StatusCode() != 200 {
373 switch resp.StatusCode() {
374 case 301, 302, 303, 307, 308:
375 loc := resp.Header.Peek("Location")
376 if loc != nil {
377 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
378 url, err := rc.ProxifyURI(loc)
379 if err == nil {
380 ctx.SetStatusCode(resp.StatusCode())
381 ctx.Response.Header.Add("Location", url)
382 if cfg.Debug {
383 log.Println("redirect to", string(loc))
384 }
385 return
386 }
387 }
388 }
389 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
390 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
391 return
392 }
393
394 contentTypeBytes := resp.Header.Peek("Content-Type")
395
396 if contentTypeBytes == nil {
397 // HTTP status code 503 : Service Unavailable
398 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
399 return
400 }
401
402 contentTypeString := string(contentTypeBytes)
403
404 // decode Content-Type header
405 contentType, error := contenttype.ParseContentType(contentTypeString)
406 if error != nil {
407 // HTTP status code 503 : Service Unavailable
408 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
409 return
410 }
411
412 // content-disposition
413 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
414
415 // check content type
416 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
417 // it is not a usual content type
418 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
419 // force attachment for allowed content type
420 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
421 } else {
422 // deny access to forbidden content type
423 // HTTP status code 403 : Forbidden
424 p.serveMainPage(ctx, 403, errors.New("forbidden content type "+parsedURI.String()))
425 return
426 }
427 }
428
429 // HACK : replace */xhtml by text/html
430 if contentType.SubType == "xhtml" {
431 contentType.TopLevelType = "text"
432 contentType.SubType = "html"
433 contentType.Suffix = ""
434 }
435
436 // conversion to UTF-8
437 var responseBody []byte
438
439 if contentType.TopLevelType == "text" {
440 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
441 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
442 responseBody, err = e.NewDecoder().Bytes(resp.Body())
443 if err != nil {
444 // HTTP status code 503 : Service Unavailable
445 p.serveMainPage(ctx, 503, err)
446 return
447 }
448 } else {
449 responseBody = resp.Body()
450 }
451 // update the charset or specify it
452 contentType.Parameters["charset"] = "UTF-8"
453 } else {
454 responseBody = resp.Body()
455 }
456
457 //
458 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
459
460 // set the content type
461 ctx.SetContentType(contentType.String())
462
463 // output according to MIME type
464 switch {
465 case contentType.SubType == "css" && contentType.Suffix == "":
466 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
467 case contentType.SubType == "html" && contentType.Suffix == "":
468 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
469 sanitizeHTML(rc, ctx, responseBody)
470 if !rc.BodyInjected {
471 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
472 if len(rc.Key) > 0 {
473 p.HasMortyKey = true
474 }
475 err := HTML_BODY_EXTENSION.Execute(ctx, p)
476 if err != nil {
477 if cfg.Debug {
478 fmt.Println("failed to inject body extension", err)
479 }
480 }
481 }
482 default:
483 if contentDispositionBytes != nil {
484 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
485 }
486 ctx.Write(responseBody)
487 }
488}
489
490// force content-disposition to attachment
491func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
492 var contentDispositionParams map[string]string
493
494 if contentDispositionBytes != nil {
495 var err error
496 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
497 if err != nil {
498 contentDispositionParams = make(map[string]string)
499 }
500 } else {
501 contentDispositionParams = make(map[string]string)
502 }
503
504 _, fileNameDefined := contentDispositionParams["filename"]
505 if !fileNameDefined {
506 // TODO : sanitize filename
507 contentDispositionParams["fileName"] = filepath.Base(url.Path)
508 }
509
510 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
511}
512
513func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
514 // serve robots.txt
515 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
516 ctx.SetContentType("text/plain")
517 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
518 return true
519 }
520
521 // server favicon.ico
522 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
523 ctx.SetContentType("image/png")
524 ctx.Write(FAVICON_BYTES)
525 return true
526 }
527
528 return false
529}
530
531func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
532 param := ctx.QueryArgs().PeekBytes(paramName)
533
534 if param == nil {
535 param = ctx.PostArgs().PeekBytes(paramName)
536 ctx.PostArgs().DelBytes(paramName)
537 }
538 ctx.QueryArgs().DelBytes(paramName)
539
540 return param
541}
542
543func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
544 // TODO
545
546 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
547
548 if urlSlices == nil {
549 out.Write(css)
550 return
551 }
552
553 startIndex := 0
554
555 for _, s := range urlSlices {
556 urlStart := s[4]
557 urlEnd := s[5]
558
559 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
560 out.Write(css[startIndex:urlStart])
561 out.Write([]byte(uri))
562 startIndex = urlEnd
563 } else if cfg.Debug {
564 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
565 }
566 }
567 if startIndex < len(css) {
568 out.Write(css[startIndex:len(css)])
569 }
570}
571
572func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
573 r := bytes.NewReader(htmlDoc)
574 decoder := html.NewTokenizer(r)
575 decoder.AllowCDATA(true)
576
577 unsafeElements := make([][]byte, 0, 8)
578 state := STATE_DEFAULT
579 for {
580 token := decoder.Next()
581 if token == html.ErrorToken {
582 err := decoder.Err()
583 if err != io.EOF {
584 log.Println("failed to parse HTML")
585 }
586 break
587 }
588
589 if len(unsafeElements) == 0 {
590
591 switch token {
592 case html.StartTagToken, html.SelfClosingTagToken:
593 tag, hasAttrs := decoder.TagName()
594 safe := !inArray(tag, UNSAFE_ELEMENTS)
595 if !safe {
596 if token != html.SelfClosingTagToken {
597 var unsafeTag []byte = make([]byte, len(tag))
598 copy(unsafeTag, tag)
599 unsafeElements = append(unsafeElements, unsafeTag)
600 }
601 break
602 }
603 if bytes.Equal(tag, []byte("base")) {
604 for {
605 attrName, attrValue, moreAttr := decoder.TagAttr()
606 if bytes.Equal(attrName, []byte("href")) {
607 parsedURI, err := url.Parse(string(attrValue))
608 if err == nil {
609 rc.BaseURL = parsedURI
610 }
611 }
612 if !moreAttr {
613 break
614 }
615 }
616 break
617 }
618 if bytes.Equal(tag, []byte("noscript")) {
619 state = STATE_IN_NOSCRIPT
620 break
621 }
622 var attrs [][][]byte
623 if hasAttrs {
624 for {
625 attrName, attrValue, moreAttr := decoder.TagAttr()
626 attrs = append(attrs, [][]byte{
627 attrName,
628 attrValue,
629 []byte(html.EscapeString(string(attrValue))),
630 })
631 if !moreAttr {
632 break
633 }
634 }
635 }
636 if bytes.Equal(tag, []byte("link")) {
637 sanitizeLinkTag(rc, out, attrs)
638 break
639 }
640
641 if bytes.Equal(tag, []byte("meta")) {
642 sanitizeMetaTag(rc, out, attrs)
643 break
644 }
645
646 fmt.Fprintf(out, "<%s", tag)
647
648 if hasAttrs {
649 sanitizeAttrs(rc, out, attrs)
650 }
651
652 if token == html.SelfClosingTagToken {
653 fmt.Fprintf(out, " />")
654 } else {
655 fmt.Fprintf(out, ">")
656 if bytes.Equal(tag, []byte("style")) {
657 state = STATE_IN_STYLE
658 }
659 }
660
661 if bytes.Equal(tag, []byte("head")) {
662 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
663 }
664
665 if bytes.Equal(tag, []byte("form")) {
666 var formURL *url.URL
667 for _, attr := range attrs {
668 if bytes.Equal(attr[0], []byte("action")) {
669 formURL, _ = url.Parse(string(attr[1]))
670 formURL = mergeURIs(rc.BaseURL, formURL)
671 break
672 }
673 }
674 if formURL == nil {
675 formURL = rc.BaseURL
676 }
677 urlStr := formURL.String()
678 var key string
679 if rc.Key != nil {
680 key = hash(urlStr, rc.Key)
681 }
682 err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key})
683 if err != nil {
684 if cfg.Debug {
685 fmt.Println("failed to inject body extension", err)
686 }
687 }
688 }
689
690 case html.EndTagToken:
691 tag, _ := decoder.TagName()
692 writeEndTag := true
693 switch string(tag) {
694 case "body":
695 p := HTMLBodyExtParam{rc.BaseURL.String(), false}
696 if len(rc.Key) > 0 {
697 p.HasMortyKey = true
698 }
699 err := HTML_BODY_EXTENSION.Execute(out, p)
700 if err != nil {
701 if cfg.Debug {
702 fmt.Println("failed to inject body extension", err)
703 }
704 }
705 rc.BodyInjected = true
706 case "style":
707 state = STATE_DEFAULT
708 case "noscript":
709 state = STATE_DEFAULT
710 writeEndTag = false
711 }
712 // skip noscript tags - only the tag, not the content, because javascript is sanitized
713 if writeEndTag {
714 fmt.Fprintf(out, "</%s>", tag)
715 }
716
717 case html.TextToken:
718 switch state {
719 case STATE_DEFAULT:
720 fmt.Fprintf(out, "%s", decoder.Raw())
721 case STATE_IN_STYLE:
722 sanitizeCSS(rc, out, decoder.Raw())
723 case STATE_IN_NOSCRIPT:
724 sanitizeHTML(rc, out, decoder.Raw())
725 }
726
727 case html.CommentToken:
728 // ignore comment. TODO : parse IE conditional comment
729
730 case html.DoctypeToken:
731 out.Write(decoder.Raw())
732 }
733 } else {
734 switch token {
735 case html.StartTagToken, html.SelfClosingTagToken:
736 tag, _ := decoder.TagName()
737 if inArray(tag, UNSAFE_ELEMENTS) {
738 unsafeElements = append(unsafeElements, tag)
739 }
740
741 case html.EndTagToken:
742 tag, _ := decoder.TagName()
743 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
744 unsafeElements = unsafeElements[:len(unsafeElements)-1]
745 }
746 }
747 }
748 }
749}
750
751func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
752 exclude := false
753 for _, attr := range attrs {
754 attrName := attr[0]
755 attrValue := attr[1]
756 if bytes.Equal(attrName, []byte("rel")) {
757 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
758 exclude = true
759 break
760 }
761 }
762 if bytes.Equal(attrName, []byte("as")) {
763 if bytes.Equal(attrValue, []byte("script")) {
764 exclude = true
765 break
766 }
767 }
768 }
769
770 if !exclude {
771 out.Write([]byte("<link"))
772 for _, attr := range attrs {
773 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
774 }
775 out.Write([]byte(">"))
776 }
777}
778
779func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
780 var http_equiv []byte
781 var content []byte
782
783 for _, attr := range attrs {
784 attrName := attr[0]
785 attrValue := attr[1]
786 if bytes.Equal(attrName, []byte("http-equiv")) {
787 http_equiv = bytes.ToLower(attrValue)
788 // exclude some <meta http-equiv="..." ..>
789 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
790 return
791 }
792 }
793 if bytes.Equal(attrName, []byte("content")) {
794 content = attrValue
795 }
796 if bytes.Equal(attrName, []byte("charset")) {
797 // exclude <meta charset="...">
798 return
799 }
800 }
801
802 out.Write([]byte("<meta"))
803 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
804 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
805 contentUrl := content[urlIndex+4:]
806 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
807 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
808 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
809 contentUrl = contentUrl[1 : len(contentUrl)-1]
810 }
811 }
812 // output proxify result
813 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
814 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
815 }
816 } else {
817 if len(http_equiv) > 0 {
818 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
819 }
820 sanitizeAttrs(rc, out, attrs)
821 }
822 out.Write([]byte(">"))
823}
824
825func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
826 for _, attr := range attrs {
827 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
828 }
829}
830
831func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
832 if inArray(attrName, SAFE_ATTRIBUTES) {
833 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
834 return
835 }
836 switch string(attrName) {
837 case "src", "href", "action":
838 if uri, err := rc.ProxifyURI(attrValue); err == nil {
839 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
840 } else if cfg.Debug {
841 log.Println("cannot proxify uri:", string(attrValue))
842 }
843 case "style":
844 cssAttr := bytes.NewBuffer(nil)
845 sanitizeCSS(rc, cssAttr, attrValue)
846 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
847 }
848}
849
850func mergeURIs(u1, u2 *url.URL) *url.URL {
851 if u2 == nil {
852 return u1
853 }
854 return u1.ResolveReference(u2)
855}
856
857// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
858// avoid memory allocation (except for the scheme)
859func sanitizeURI(uri []byte) ([]byte, string) {
860 first_rune_index := 0
861 first_rune_seen := false
862 scheme_last_index := -1
863 buffer := bytes.NewBuffer(make([]byte, 0, 10))
864
865 // remove trailing space and special characters
866 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
867
868 // loop over byte by byte
869 for i, c := range uri {
870 // ignore special characters and space (c <= 32)
871 if c > 32 {
872 // append to the lower case of the rune to buffer
873 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
874 c = c + 'a' - 'A'
875 }
876
877 buffer.WriteByte(c)
878
879 // update the first rune index that is not a special rune
880 if !first_rune_seen {
881 first_rune_index = i
882 first_rune_seen = true
883 }
884
885 if c == ':' {
886 // colon rune found, we have found the scheme
887 scheme_last_index = i
888 break
889 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
890 // special case : most probably a relative URI
891 break
892 }
893 }
894 }
895
896 if scheme_last_index != -1 {
897 // scheme found
898 // copy the "lower case without special runes scheme" before the ":" rune
899 scheme_start_index := scheme_last_index - buffer.Len() + 1
900 copy(uri[scheme_start_index:], buffer.Bytes())
901 // and return the result
902 return uri[scheme_start_index:], buffer.String()
903 } else {
904 // scheme NOT found
905 return uri[first_rune_index:], ""
906 }
907}
908
909func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
910 // sanitize URI
911 uri, scheme := sanitizeURI(uri)
912
913 // remove javascript protocol
914 if scheme == "javascript:" {
915 return "", nil
916 }
917
918 // TODO check malicious data: - e.g. data:script
919 if scheme == "data:" {
920 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
921 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
922 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
923 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
924 bytes.HasPrefix(uri, []byte("data:image/webp")) {
925 // should be safe
926 return string(uri), nil
927 } else {
928 // unsafe data
929 return "", nil
930 }
931 }
932
933 // parse the uri
934 u, err := url.Parse(string(uri))
935 if err != nil {
936 return "", err
937 }
938
939 // get the fragment (with the prefix "#")
940 fragment := ""
941 if len(u.Fragment) > 0 {
942 fragment = "#" + u.Fragment
943 }
944
945 // reset the fragment: it is not included in the mortyurl
946 u.Fragment = ""
947
948 // merge the URI with the document URI
949 u = mergeURIs(rc.BaseURL, u)
950
951 // simple internal link ?
952 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
953 if u.Scheme == rc.BaseURL.Scheme &&
954 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
955 u.Host == rc.BaseURL.Host &&
956 u.Path == rc.BaseURL.Path &&
957 u.RawQuery == rc.BaseURL.RawQuery {
958 // the fragment is the only difference between the document URI and the uri parameter
959 return fragment, nil
960 }
961
962 // return full URI and fragment (if not empty)
963 morty_uri := u.String()
964
965 if rc.Key == nil {
966 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
967 }
968 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
969}
970
971func inArray(b []byte, a [][]byte) bool {
972 for _, b2 := range a {
973 if bytes.Equal(b, b2) {
974 return true
975 }
976 }
977 return false
978}
979
980func hash(msg string, key []byte) string {
981 mac := hmac.New(sha256.New, key)
982 mac.Write([]byte(msg))
983 return hex.EncodeToString(mac.Sum(nil))
984}
985
986func verifyRequestURI(uri, hashMsg, key []byte) bool {
987 h := make([]byte, hex.DecodedLen(len(hashMsg)))
988 _, err := hex.Decode(h, hashMsg)
989 if err != nil {
990 if cfg.Debug {
991 log.Println("hmac error:", err)
992 }
993 return false
994 }
995 mac := hmac.New(sha256.New, key)
996 mac.Write(uri)
997 return hmac.Equal(h, mac.Sum(nil))
998}
999
1000func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
1001 ctx.SetContentType("text/html")
1002 ctx.SetStatusCode(403)
1003 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1004 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
1005 ctx.Write([]byte("<p>Following</p><p><a href=\""))
1006 ctx.Write([]byte(html.EscapeString(uri.String())))
1007 ctx.Write([]byte("\" rel=\"noreferrer\">"))
1008 ctx.Write([]byte(html.EscapeString(uri.String())))
1009 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
1010 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1011}
1012
1013func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
1014 ctx.SetContentType("text/html; charset=UTF-8")
1015 ctx.SetStatusCode(statusCode)
1016 ctx.Write([]byte(MORTY_HTML_PAGE_START))
1017 if err != nil {
1018 if cfg.Debug {
1019 log.Println("error:", err)
1020 }
1021 ctx.Write([]byte("<h2>Error: "))
1022 ctx.Write([]byte(html.EscapeString(err.Error())))
1023 ctx.Write([]byte("</h2>"))
1024 }
1025 if p.Key == nil {
1026 ctx.Write([]byte(`
1027 <form action="post">
1028 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
1029 <input type="submit" value="go" />
1030 </form>`))
1031 } else {
1032 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
1033 }
1034 ctx.Write([]byte(MORTY_HTML_PAGE_END))
1035}
1036
1037func main() {
1038 cfg.ListenAddress = *flag.String("listen", cfg.ListenAddress, "Listen address")
1039 cfg.Key = *flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
1040 cfg.IPV6 = *flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
1041 cfg.Debug = *flag.Bool("debug", cfg.Debug, "Debug mode")
1042 cfg.RequestTimeout = *flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
1043 version := flag.Bool("version", false, "Show version")
1044 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
1045 flag.Parse()
1046
1047 if *version {
1048 fmt.Println(VERSION)
1049 return
1050 }
1051
1052 if *socks5 != "" {
1053 // this disables CLIENT.DialDualStack
1054 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1055 }
1056 if cfg.IPV6 {
1057 CLIENT.Dial = fasthttp.DialDualStack
1058 }
1059
1060 p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second}
1061
1062 if cfg.Key != "" {
1063 var err error
1064 p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
1065 if err != nil {
1066 log.Fatal("Error parsing -key", err.Error())
1067 os.Exit(1)
1068 }
1069 }
1070
1071 log.Println("listening on", cfg.ListenAddress)
1072
1073 if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
1074 log.Fatal("Error in ListenAndServe:", err)
1075 }
1076}
Note: See TracBrowser for help on using the repository browser.