source: code/trunk/morty.go@ 113

Last change on this file since 113 was 113, checked in by alex, 5 years ago

Increase ReadBufferSize (#74)

fix #61

File size: 28.1 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
[68]14 "mime"
[1]15 "net/url"
[78]16 "os"
[68]17 "path/filepath"
[1]18 "regexp"
19 "strings"
[4]20 "time"
[60]21 "unicode/utf8"
[1]22
23 "github.com/valyala/fasthttp"
[109]24 "github.com/valyala/fasthttp/fasthttpproxy"
[1]25 "golang.org/x/net/html"
[45]26 "golang.org/x/net/html/charset"
27 "golang.org/x/text/encoding"
[68]28
29 "github.com/asciimoo/morty/contenttype"
[1]30)
31
32const (
33 STATE_DEFAULT int = 0
34 STATE_IN_STYLE int = 1
35 STATE_IN_NOSCRIPT int = 2
36)
37
[77]38const VERSION = "v0.2.0"
[74]39
[100]40var DEBUG = os.Getenv("DEBUG") != "false"
[96]41
[1]42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
[113]44 ReadBufferSize: 16 * 1024, // 16K
[1]45}
46
[27]47var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]48
[68]49var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
50 // html
51 contenttype.NewFilterEquals("text", "html", ""),
52 contenttype.NewFilterEquals("application", "xhtml", "xml"),
53 // css
54 contenttype.NewFilterEquals("text", "css", ""),
55 // images
56 contenttype.NewFilterEquals("image", "gif", ""),
57 contenttype.NewFilterEquals("image", "png", ""),
58 contenttype.NewFilterEquals("image", "jpeg", ""),
59 contenttype.NewFilterEquals("image", "pjpeg", ""),
60 contenttype.NewFilterEquals("image", "webp", ""),
61 contenttype.NewFilterEquals("image", "tiff", ""),
62 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
63 contenttype.NewFilterEquals("image", "bmp", ""),
64 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]65 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]66 // fonts
67 contenttype.NewFilterEquals("application", "font-otf", ""),
68 contenttype.NewFilterEquals("application", "font-ttf", ""),
69 contenttype.NewFilterEquals("application", "font-woff", ""),
70 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
71})
72
73var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
74 // texts
75 contenttype.NewFilterEquals("text", "csv", ""),
[103]76 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
[68]77 contenttype.NewFilterEquals("text", "plain", ""),
78 // API
79 contenttype.NewFilterEquals("application", "json", ""),
80 // Documents
81 contenttype.NewFilterEquals("application", "x-latex", ""),
82 contenttype.NewFilterEquals("application", "pdf", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
86 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
87 // Compressed archives
88 contenttype.NewFilterEquals("application", "zip", ""),
89 contenttype.NewFilterEquals("application", "gzip", ""),
90 contenttype.NewFilterEquals("application", "x-compressed", ""),
91 contenttype.NewFilterEquals("application", "x-gtar", ""),
92 contenttype.NewFilterEquals("application", "x-compress", ""),
93 // Generic binary
94 contenttype.NewFilterEquals("application", "octet-stream", ""),
95})
96
97var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
98 "charset": true,
99}
100
[1]101var UNSAFE_ELEMENTS [][]byte = [][]byte{
102 []byte("applet"),
103 []byte("canvas"),
104 []byte("embed"),
105 //[]byte("iframe"),
[46]106 []byte("math"),
[1]107 []byte("script"),
[46]108 []byte("svg"),
[1]109}
110
111var SAFE_ATTRIBUTES [][]byte = [][]byte{
112 []byte("abbr"),
113 []byte("accesskey"),
114 []byte("align"),
115 []byte("alt"),
[13]116 []byte("as"),
[1]117 []byte("autocomplete"),
118 []byte("charset"),
119 []byte("checked"),
120 []byte("class"),
121 []byte("content"),
122 []byte("contenteditable"),
123 []byte("contextmenu"),
124 []byte("dir"),
125 []byte("for"),
126 []byte("height"),
127 []byte("hidden"),
[46]128 []byte("hreflang"),
[1]129 []byte("id"),
130 []byte("lang"),
131 []byte("media"),
132 []byte("method"),
133 []byte("name"),
134 []byte("nowrap"),
135 []byte("placeholder"),
136 []byte("property"),
137 []byte("rel"),
138 []byte("spellcheck"),
139 []byte("tabindex"),
140 []byte("target"),
141 []byte("title"),
142 []byte("translate"),
143 []byte("type"),
144 []byte("value"),
145 []byte("width"),
146}
147
148var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
149 []byte("area"),
150 []byte("base"),
151 []byte("br"),
152 []byte("col"),
153 []byte("embed"),
154 []byte("hr"),
155 []byte("img"),
156 []byte("input"),
157 []byte("keygen"),
158 []byte("link"),
159 []byte("meta"),
160 []byte("param"),
161 []byte("source"),
162 []byte("track"),
163 []byte("wbr"),
164}
165
[46]166var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
167 []byte("alternate"),
168 []byte("archives"),
169 []byte("author"),
170 []byte("copyright"),
171 []byte("first"),
172 []byte("help"),
173 []byte("icon"),
174 []byte("index"),
175 []byte("last"),
176 []byte("license"),
177 []byte("manifest"),
178 []byte("next"),
179 []byte("pingback"),
180 []byte("prev"),
181 []byte("publisher"),
182 []byte("search"),
183 []byte("shortcut icon"),
184 []byte("stylesheet"),
185 []byte("up"),
186}
187
188var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
189 // X-UA-Compatible will be added automaticaly, so it can be skipped
190 []byte("date"),
191 []byte("last-modified"),
[50]192 []byte("refresh"), // URL rewrite
[46]193 // []byte("location"), TODO URL rewrite
194 []byte("content-language"),
195}
196
[1]197type Proxy struct {
[4]198 Key []byte
199 RequestTimeout time.Duration
[1]200}
201
202type RequestConfig struct {
203 Key []byte
[23]204 BaseURL *url.URL
[1]205}
206
[2]207var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]208
209var HTML_BODY_EXTENSION string = `
[72]210<input type="checkbox" id="mortytoggle" autocomplete="off" />
[1]211<div id="mortyheader">
[72]212 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]213</div>
214<style>
[67]215#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
216#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
217#mortyheader p { padding: 0 0 0.7em 0; display: block; }
218#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
219#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]220input[type=checkbox]#mortytoggle { display: none; }
[72]221input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
[1]222</style>
223`
224
[46]225var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
226<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]227<meta name="referrer" content="no-referrer">
[46]228`
[45]229
[69]230var MORTY_HTML_PAGE_START string = `<!doctype html>
231<html>
232<head>
233<title>MortyProxy</title>
234<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
235<style>
236html { height: 100%; }
237body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
238input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
239input[placeholder] { width:80%; }
240a { text-decoration: none; #2980b9; }
241h1, h2 { font-weight: 200; margin-bottom: 2rem; }
242h1 { font-size: 3em; }
243.container { flex:1; min-height: 100%; margin-bottom: 1em; }
244.footer { margin: 1em; }
245.footer p { font-size: 0.8em; }
246</style>
247</head>
248<body>
249 <div class="container">
250 <h1>MortyProxy</h1>
251`
252
253var MORTY_HTML_PAGE_END string = `
254 </div>
255 <div class="footer">
256 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
257 <a href="https://github.com/asciimoo/morty">view on github</a>
258 </p>
259 </div>
260</body>
261</html>`
262
[67]263var FAVICON_BYTES []byte
264
265func init() {
266 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
267
268 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
269}
270
[1]271func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]272
273 if appRequestHandler(ctx) {
274 return
275 }
276
[1]277 requestHash := popRequestParam(ctx, []byte("mortyhash"))
278
279 requestURI := popRequestParam(ctx, []byte("mortyurl"))
280
281 if requestURI == nil {
[35]282 p.serveMainPage(ctx, 200, nil)
[1]283 return
284 }
285
286 if p.Key != nil {
287 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]288 // HTTP status code 403 : Forbidden
289 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]290 return
291 }
292 }
293
[97]294 parsedURI, err := url.Parse(string(requestURI))
[1]295
[11]296 if err != nil {
[35]297 // HTTP status code 500 : Internal Server Error
298 p.serveMainPage(ctx, 500, err)
[1]299 return
300 }
301
[69]302 // Serve an intermediate page for protocols other than HTTP(S)
303 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
304 p.serveExitMortyPage(ctx, parsedURI)
305 return
306 }
307
[1]308 req := fasthttp.AcquireRequest()
309 defer fasthttp.ReleaseRequest(req)
[12]310 req.SetConnectionClose()
[1]311
[47]312 requestURIStr := string(requestURI)
[1]313
[97]314 if DEBUG {
315 log.Println("getting", requestURIStr)
316 }
[1]317
[47]318 req.SetRequestURI(requestURIStr)
[111]319 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
[1]320
321 resp := fasthttp.AcquireResponse()
322 defer fasthttp.ReleaseResponse(resp)
323
324 req.Header.SetMethodBytes(ctx.Method())
325 if ctx.IsPost() || ctx.IsPut() {
326 req.SetBody(ctx.PostBody())
327 }
328
[11]329 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
330
331 if err != nil {
[35]332 if err == fasthttp.ErrTimeout {
333 // HTTP status code 504 : Gateway Time-Out
334 p.serveMainPage(ctx, 504, err)
335 } else {
336 // HTTP status code 500 : Internal Server Error
337 p.serveMainPage(ctx, 500, err)
338 }
[1]339 return
340 }
341
342 if resp.StatusCode() != 200 {
343 switch resp.StatusCode() {
[7]344 case 301, 302, 303, 307, 308:
[1]345 loc := resp.Header.Peek("Location")
346 if loc != nil {
[97]347 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
348 url, err := rc.ProxifyURI(loc)
349 if err == nil {
350 ctx.SetStatusCode(resp.StatusCode())
351 ctx.Response.Header.Add("Location", url)
352 if DEBUG {
353 log.Println("redirect to", string(loc))
[96]354 }
[1]355 return
356 }
357 }
358 }
[47]359 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]360 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]361 return
362 }
363
[68]364 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]365
[68]366 if contentTypeBytes == nil {
[35]367 // HTTP status code 503 : Service Unavailable
368 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]369 return
370 }
371
[68]372 contentTypeString := string(contentTypeBytes)
373
374 // decode Content-Type header
375 contentType, error := contenttype.ParseContentType(contentTypeString)
376 if error != nil {
377 // HTTP status code 503 : Service Unavailable
378 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]379 return
380 }
381
[68]382 // content-disposition
383 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]384
[68]385 // check content type
386 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
387 // it is not a usual content type
388 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
389 // force attachment for allowed content type
390 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
391 } else {
392 // deny access to forbidden content type
393 // HTTP status code 403 : Forbidden
394 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
395 return
396 }
397 }
398
399 // HACK : replace */xhtml by text/html
400 if contentType.SubType == "xhtml" {
401 contentType.TopLevelType = "text"
402 contentType.SubType = "html"
403 contentType.Suffix = ""
404 }
405
406 // conversion to UTF-8
[1]407 var responseBody []byte
408
[68]409 if contentType.TopLevelType == "text" {
410 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]411 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
412 responseBody, err = e.NewDecoder().Bytes(resp.Body())
413 if err != nil {
414 // HTTP status code 503 : Service Unavailable
415 p.serveMainPage(ctx, 503, err)
416 return
417 }
418 } else {
419 responseBody = resp.Body()
[1]420 }
[68]421 // update the charset or specify it
422 contentType.Parameters["charset"] = "UTF-8"
[1]423 } else {
424 responseBody = resp.Body()
425 }
426
[68]427 //
428 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]429
[68]430 // set the content type
431 ctx.SetContentType(contentType.String())
432
433 // output according to MIME type
[1]434 switch {
[68]435 case contentType.SubType == "css" && contentType.Suffix == "":
[23]436 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]437 case contentType.SubType == "html" && contentType.Suffix == "":
[23]438 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]439 default:
[68]440 if contentDispositionBytes != nil {
441 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]442 }
[1]443 ctx.Write(responseBody)
444 }
445}
446
[68]447// force content-disposition to attachment
448func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
449 var contentDispositionParams map[string]string
450
451 if contentDispositionBytes != nil {
452 var err error
453 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
454 if err != nil {
455 contentDispositionParams = make(map[string]string)
456 }
457 } else {
458 contentDispositionParams = make(map[string]string)
459 }
460
461 _, fileNameDefined := contentDispositionParams["filename"]
462 if !fileNameDefined {
463 // TODO : sanitize filename
464 contentDispositionParams["fileName"] = filepath.Base(url.Path)
465 }
466
467 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
468}
469
[10]470func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]471 // serve robots.txt
[10]472 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
473 ctx.SetContentType("text/plain")
474 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
475 return true
476 }
[11]477
[67]478 // server favicon.ico
479 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
480 ctx.SetContentType("image/png")
481 ctx.Write(FAVICON_BYTES)
482 return true
483 }
484
[10]485 return false
486}
487
[1]488func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
489 param := ctx.QueryArgs().PeekBytes(paramName)
490
491 if param == nil {
492 param = ctx.PostArgs().PeekBytes(paramName)
493 if param != nil {
494 ctx.PostArgs().DelBytes(paramName)
495 }
496 } else {
497 ctx.QueryArgs().DelBytes(paramName)
498 }
499
500 return param
501}
502
[9]503func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]504 // TODO
505
506 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
507
508 if urlSlices == nil {
[9]509 out.Write(css)
[1]510 return
511 }
512
513 startIndex := 0
514
515 for _, s := range urlSlices {
[15]516 urlStart := s[4]
517 urlEnd := s[5]
[1]518
[60]519 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]520 out.Write(css[startIndex:urlStart])
521 out.Write([]byte(uri))
[1]522 startIndex = urlEnd
[97]523 } else if DEBUG {
[36]524 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]525 }
526 }
527 if startIndex < len(css) {
[9]528 out.Write(css[startIndex:len(css)])
[1]529 }
530}
531
[9]532func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]533 r := bytes.NewReader(htmlDoc)
534 decoder := html.NewTokenizer(r)
535 decoder.AllowCDATA(true)
536
537 unsafeElements := make([][]byte, 0, 8)
538 state := STATE_DEFAULT
539 for {
540 token := decoder.Next()
541 if token == html.ErrorToken {
542 err := decoder.Err()
543 if err != io.EOF {
[97]544 log.Println("failed to parse HTML")
[1]545 }
546 break
547 }
548
549 if len(unsafeElements) == 0 {
550
551 switch token {
552 case html.StartTagToken, html.SelfClosingTagToken:
553 tag, hasAttrs := decoder.TagName()
554 safe := !inArray(tag, UNSAFE_ELEMENTS)
555 if !safe {
556 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
557 var unsafeTag []byte = make([]byte, len(tag))
558 copy(unsafeTag, tag)
559 unsafeElements = append(unsafeElements, unsafeTag)
560 }
561 break
562 }
[38]563 if bytes.Equal(tag, []byte("base")) {
564 for {
565 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]566 if bytes.Equal(attrName, []byte("href")) {
567 parsedURI, err := url.Parse(string(attrValue))
568 if err == nil {
569 rc.BaseURL = parsedURI
570 }
[38]571 }
572 if !moreAttr {
573 break
574 }
575 }
576 break
577 }
[1]578 if bytes.Equal(tag, []byte("noscript")) {
579 state = STATE_IN_NOSCRIPT
580 break
581 }
582 var attrs [][][]byte
583 if hasAttrs {
584 for {
585 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]586 attrs = append(attrs, [][]byte{
587 attrName,
588 attrValue,
589 []byte(html.EscapeString(string(attrValue))),
590 })
[1]591 if !moreAttr {
592 break
593 }
594 }
[13]595 }
596 if bytes.Equal(tag, []byte("link")) {
597 sanitizeLinkTag(rc, out, attrs)
598 break
599 }
600
[45]601 if bytes.Equal(tag, []byte("meta")) {
602 sanitizeMetaTag(rc, out, attrs)
603 break
604 }
605
[13]606 fmt.Fprintf(out, "<%s", tag)
607
608 if hasAttrs {
[45]609 sanitizeAttrs(rc, out, attrs)
[1]610 }
[13]611
[1]612 if token == html.SelfClosingTagToken {
[9]613 fmt.Fprintf(out, " />")
[1]614 } else {
[9]615 fmt.Fprintf(out, ">")
[1]616 if bytes.Equal(tag, []byte("style")) {
617 state = STATE_IN_STYLE
618 }
619 }
[13]620
[45]621 if bytes.Equal(tag, []byte("head")) {
[46]622 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]623 }
624
[1]625 if bytes.Equal(tag, []byte("form")) {
626 var formURL *url.URL
627 for _, attr := range attrs {
628 if bytes.Equal(attr[0], []byte("action")) {
629 formURL, _ = url.Parse(string(attr[1]))
[28]630 formURL = mergeURIs(rc.BaseURL, formURL)
[1]631 break
632 }
633 }
634 if formURL == nil {
[23]635 formURL = rc.BaseURL
[1]636 }
[2]637 urlStr := formURL.String()
638 var key string
639 if rc.Key != nil {
640 key = hash(urlStr, rc.Key)
641 }
[9]642 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]643
644 }
645
646 case html.EndTagToken:
647 tag, _ := decoder.TagName()
648 writeEndTag := true
649 switch string(tag) {
650 case "body":
[23]651 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]652 case "style":
653 state = STATE_DEFAULT
654 case "noscript":
655 state = STATE_DEFAULT
656 writeEndTag = false
657 }
658 // skip noscript tags - only the tag, not the content, because javascript is sanitized
659 if writeEndTag {
[9]660 fmt.Fprintf(out, "</%s>", tag)
[1]661 }
662
663 case html.TextToken:
664 switch state {
665 case STATE_DEFAULT:
[9]666 fmt.Fprintf(out, "%s", decoder.Raw())
[1]667 case STATE_IN_STYLE:
[9]668 sanitizeCSS(rc, out, decoder.Raw())
[1]669 case STATE_IN_NOSCRIPT:
[9]670 sanitizeHTML(rc, out, decoder.Raw())
[1]671 }
672
[62]673 case html.CommentToken:
674 // ignore comment. TODO : parse IE conditional comment
675
676 case html.DoctypeToken:
[9]677 out.Write(decoder.Raw())
[1]678 }
679 } else {
680 switch token {
681 case html.StartTagToken:
682 tag, _ := decoder.TagName()
683 if inArray(tag, UNSAFE_ELEMENTS) {
684 unsafeElements = append(unsafeElements, tag)
685 }
686
687 case html.EndTagToken:
688 tag, _ := decoder.TagName()
689 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
690 unsafeElements = unsafeElements[:len(unsafeElements)-1]
691 }
692 }
693 }
694 }
695}
696
[13]697func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
698 exclude := false
699 for _, attr := range attrs {
700 attrName := attr[0]
701 attrValue := attr[1]
702 if bytes.Equal(attrName, []byte("rel")) {
[46]703 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]704 exclude = true
705 break
706 }
707 }
708 if bytes.Equal(attrName, []byte("as")) {
709 if bytes.Equal(attrValue, []byte("script")) {
710 exclude = true
711 break
712 }
713 }
714 }
715
716 if !exclude {
717 out.Write([]byte("<link"))
718 for _, attr := range attrs {
[21]719 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]720 }
721 out.Write([]byte(">"))
722 }
723}
724
[45]725func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]726 var http_equiv []byte
727 var content []byte
728
729 for _, attr := range attrs {
730 attrName := attr[0]
731 attrValue := attr[1]
732 if bytes.Equal(attrName, []byte("http-equiv")) {
733 http_equiv = bytes.ToLower(attrValue)
[46]734 // exclude some <meta http-equiv="..." ..>
735 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
736 return
737 }
[1]738 }
739 if bytes.Equal(attrName, []byte("content")) {
740 content = attrValue
741 }
[45]742 if bytes.Equal(attrName, []byte("charset")) {
743 // exclude <meta charset="...">
744 return
745 }
[1]746 }
747
[45]748 out.Write([]byte("<meta"))
[14]749 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
750 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
751 contentUrl := content[urlIndex+4:]
[36]752 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]753 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]754 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]755 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]756 }
757 }
758 // output proxify result
[60]759 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]760 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]761 }
762 } else {
[46]763 if len(http_equiv) > 0 {
764 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
765 }
[9]766 sanitizeAttrs(rc, out, attrs)
[1]767 }
[45]768 out.Write([]byte(">"))
[1]769}
770
[9]771func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]772 for _, attr := range attrs {
[21]773 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]774 }
775}
776
[21]777func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]778 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]779 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]780 return
781 }
782 switch string(attrName) {
783 case "src", "href", "action":
[60]784 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]785 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[97]786 } else if DEBUG {
[36]787 log.Println("cannot proxify uri:", string(attrValue))
[1]788 }
789 case "style":
[21]790 cssAttr := bytes.NewBuffer(nil)
791 sanitizeCSS(rc, cssAttr, attrValue)
792 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]793 }
794}
795
[36]796func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]797 if u2 == nil {
798 return u1
799 }
[28]800 return u1.ResolveReference(u2)
[1]801}
802
[60]803// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
804// avoid memory allocation (except for the scheme)
805func sanitizeURI(uri []byte) ([]byte, string) {
806 first_rune_index := 0
807 first_rune_seen := false
808 scheme_last_index := -1
809 buffer := bytes.NewBuffer(make([]byte, 0, 10))
810
811 // remove trailing space and special characters
812 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
813
814 // loop over byte by byte
815 for i, c := range uri {
816 // ignore special characters and space (c <= 32)
817 if c > 32 {
818 // append to the lower case of the rune to buffer
819 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
820 c = c + 'a' - 'A'
821 }
822
823 buffer.WriteByte(c)
824
825 // update the first rune index that is not a special rune
826 if !first_rune_seen {
827 first_rune_index = i
828 first_rune_seen = true
829 }
830
831 if c == ':' {
832 // colon rune found, we have found the scheme
833 scheme_last_index = i
834 break
835 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
836 // special case : most probably a relative URI
837 break
838 }
839 }
840 }
841
842 if scheme_last_index != -1 {
843 // scheme found
844 // copy the "lower case without special runes scheme" before the ":" rune
845 scheme_start_index := scheme_last_index - buffer.Len() + 1
846 copy(uri[scheme_start_index:], buffer.Bytes())
847 // and return the result
848 return uri[scheme_start_index:], buffer.String()
849 } else {
850 // scheme NOT found
851 return uri[first_rune_index:], ""
852 }
853}
854
855func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
856 // sanitize URI
857 uri, scheme := sanitizeURI(uri)
858
[28]859 // remove javascript protocol
[60]860 if scheme == "javascript:" {
[28]861 return "", nil
862 }
[57]863
[1]864 // TODO check malicious data: - e.g. data:script
[60]865 if scheme == "data:" {
[61]866 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
867 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
868 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
869 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
870 bytes.HasPrefix(uri, []byte("data:image/webp")) {
871 // should be safe
872 return string(uri), nil
873 } else {
874 // unsafe data
875 return "", nil
876 }
[1]877 }
878
[57]879 // parse the uri
[60]880 u, err := url.Parse(string(uri))
[1]881 if err != nil {
882 return "", err
883 }
[57]884
885 // get the fragment (with the prefix "#")
886 fragment := ""
887 if len(u.Fragment) > 0 {
888 fragment = "#" + u.Fragment
889 }
890
891 // reset the fragment: it is not included in the mortyurl
892 u.Fragment = ""
893
894 // merge the URI with the document URI
[28]895 u = mergeURIs(rc.BaseURL, u)
[1]896
[57]897 // simple internal link ?
898 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
899 if u.Scheme == rc.BaseURL.Scheme &&
900 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
901 u.Host == rc.BaseURL.Host &&
902 u.Path == rc.BaseURL.Path &&
903 u.RawQuery == rc.BaseURL.RawQuery {
904 // the fragment is the only difference between the document URI and the uri parameter
905 return fragment, nil
906 }
907
908 // return full URI and fragment (if not empty)
[60]909 morty_uri := u.String()
[1]910
911 if rc.Key == nil {
[60]912 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]913 }
[60]914 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]915}
916
917func inArray(b []byte, a [][]byte) bool {
918 for _, b2 := range a {
919 if bytes.Equal(b, b2) {
920 return true
921 }
922 }
923 return false
924}
925
926func hash(msg string, key []byte) string {
927 mac := hmac.New(sha256.New, key)
928 mac.Write([]byte(msg))
929 return hex.EncodeToString(mac.Sum(nil))
930}
931
932func verifyRequestURI(uri, hashMsg, key []byte) bool {
933 h := make([]byte, hex.DecodedLen(len(hashMsg)))
934 _, err := hex.Decode(h, hashMsg)
935 if err != nil {
[97]936 if DEBUG {
937 log.Println("hmac error:", err)
938 }
[1]939 return false
940 }
941 mac := hmac.New(sha256.New, key)
942 mac.Write(uri)
943 return hmac.Equal(h, mac.Sum(nil))
944}
945
[69]946func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
947 ctx.SetContentType("text/html")
948 ctx.SetStatusCode(403)
949 ctx.Write([]byte(MORTY_HTML_PAGE_START))
950 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
951 ctx.Write([]byte("<p>Following</p><p><a href=\""))
952 ctx.Write([]byte(html.EscapeString(uri.String())))
953 ctx.Write([]byte("\" rel=\"noreferrer\">"))
954 ctx.Write([]byte(html.EscapeString(uri.String())))
955 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
956 ctx.Write([]byte(MORTY_HTML_PAGE_END))
957}
958
[35]959func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]960 ctx.SetContentType("text/html; charset=UTF-8")
[35]961 ctx.SetStatusCode(statusCode)
[69]962 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]963 if err != nil {
[97]964 if DEBUG {
965 log.Println("error:", err)
966 }
[11]967 ctx.Write([]byte("<h2>Error: "))
968 ctx.Write([]byte(html.EscapeString(err.Error())))
969 ctx.Write([]byte("</h2>"))
970 }
[1]971 if p.Key == nil {
972 ctx.Write([]byte(`
[36]973 <form action="post">
974 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
975 <input type="submit" value="go" />
976 </form>`))
[11]977 } else {
978 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]979 }
[69]980 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]981}
982
983func main() {
[78]984 default_listen_addr := os.Getenv("MORTY_ADDRESS")
985 if default_listen_addr == "" {
986 default_listen_addr = "127.0.0.1:3000"
987 }
988 default_key := os.Getenv("MORTY_KEY")
989 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]990 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]991 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]992 version := flag.Bool("version", false, "Show version")
[4]993 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[109]994 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
[1]995 flag.Parse()
996
[74]997 if *version {
998 fmt.Println(VERSION)
999 return
1000 }
1001
[24]1002 if *ipv6 {
[109]1003 CLIENT.DialDualStack = true
[24]1004 }
1005
[109]1006 if *socks5 != "" {
1007 // this disables CLIENT.DialDualStack
1008 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1009 }
1010
[4]1011 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1012
1013 if *key != "" {
[92]1014 var err error
1015 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1016 if err != nil {
1017 log.Fatal("Error parsing -key", err.Error())
1018 os.Exit(1)
[92]1019 }
[1]1020 }
1021
1022 log.Println("listening on", *listen)
1023
1024 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1025 log.Fatal("Error in ListenAndServe:", err)
1026 }
1027}
Note: See TracBrowser for help on using the repository browser.