source: code/trunk/morty.go@ 112

Last change on this file since 112 was 112, checked in by alex, 5 years ago

Merge pull request #75 from asciimoo/dalf-update-ua

[mod] update User Agent

File size: 28.0 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
[67]7 "encoding/base64"
[1]8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
[68]14 "mime"
[1]15 "net/url"
[78]16 "os"
[68]17 "path/filepath"
[1]18 "regexp"
19 "strings"
[4]20 "time"
[60]21 "unicode/utf8"
[1]22
23 "github.com/valyala/fasthttp"
[109]24 "github.com/valyala/fasthttp/fasthttpproxy"
[1]25 "golang.org/x/net/html"
[45]26 "golang.org/x/net/html/charset"
27 "golang.org/x/text/encoding"
[68]28
29 "github.com/asciimoo/morty/contenttype"
[1]30)
31
32const (
33 STATE_DEFAULT int = 0
34 STATE_IN_STYLE int = 1
35 STATE_IN_NOSCRIPT int = 2
36)
37
[77]38const VERSION = "v0.2.0"
[74]39
[100]40var DEBUG = os.Getenv("DEBUG") != "false"
[96]41
[1]42var CLIENT *fasthttp.Client = &fasthttp.Client{
43 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
44}
45
[27]46var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]47
[68]48var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
49 // html
50 contenttype.NewFilterEquals("text", "html", ""),
51 contenttype.NewFilterEquals("application", "xhtml", "xml"),
52 // css
53 contenttype.NewFilterEquals("text", "css", ""),
54 // images
55 contenttype.NewFilterEquals("image", "gif", ""),
56 contenttype.NewFilterEquals("image", "png", ""),
57 contenttype.NewFilterEquals("image", "jpeg", ""),
58 contenttype.NewFilterEquals("image", "pjpeg", ""),
59 contenttype.NewFilterEquals("image", "webp", ""),
60 contenttype.NewFilterEquals("image", "tiff", ""),
61 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
62 contenttype.NewFilterEquals("image", "bmp", ""),
63 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
[88]64 contenttype.NewFilterEquals("image", "x-icon", ""),
[68]65 // fonts
66 contenttype.NewFilterEquals("application", "font-otf", ""),
67 contenttype.NewFilterEquals("application", "font-ttf", ""),
68 contenttype.NewFilterEquals("application", "font-woff", ""),
69 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
70})
71
72var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
73 // texts
74 contenttype.NewFilterEquals("text", "csv", ""),
[103]75 contenttype.NewFilterEquals("text", "tab-separated-values", ""),
[68]76 contenttype.NewFilterEquals("text", "plain", ""),
77 // API
78 contenttype.NewFilterEquals("application", "json", ""),
79 // Documents
80 contenttype.NewFilterEquals("application", "x-latex", ""),
81 contenttype.NewFilterEquals("application", "pdf", ""),
82 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
83 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
84 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
85 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
86 // Compressed archives
87 contenttype.NewFilterEquals("application", "zip", ""),
88 contenttype.NewFilterEquals("application", "gzip", ""),
89 contenttype.NewFilterEquals("application", "x-compressed", ""),
90 contenttype.NewFilterEquals("application", "x-gtar", ""),
91 contenttype.NewFilterEquals("application", "x-compress", ""),
92 // Generic binary
93 contenttype.NewFilterEquals("application", "octet-stream", ""),
94})
95
96var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
97 "charset": true,
98}
99
[1]100var UNSAFE_ELEMENTS [][]byte = [][]byte{
101 []byte("applet"),
102 []byte("canvas"),
103 []byte("embed"),
104 //[]byte("iframe"),
[46]105 []byte("math"),
[1]106 []byte("script"),
[46]107 []byte("svg"),
[1]108}
109
110var SAFE_ATTRIBUTES [][]byte = [][]byte{
111 []byte("abbr"),
112 []byte("accesskey"),
113 []byte("align"),
114 []byte("alt"),
[13]115 []byte("as"),
[1]116 []byte("autocomplete"),
117 []byte("charset"),
118 []byte("checked"),
119 []byte("class"),
120 []byte("content"),
121 []byte("contenteditable"),
122 []byte("contextmenu"),
123 []byte("dir"),
124 []byte("for"),
125 []byte("height"),
126 []byte("hidden"),
[46]127 []byte("hreflang"),
[1]128 []byte("id"),
129 []byte("lang"),
130 []byte("media"),
131 []byte("method"),
132 []byte("name"),
133 []byte("nowrap"),
134 []byte("placeholder"),
135 []byte("property"),
136 []byte("rel"),
137 []byte("spellcheck"),
138 []byte("tabindex"),
139 []byte("target"),
140 []byte("title"),
141 []byte("translate"),
142 []byte("type"),
143 []byte("value"),
144 []byte("width"),
145}
146
147var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
148 []byte("area"),
149 []byte("base"),
150 []byte("br"),
151 []byte("col"),
152 []byte("embed"),
153 []byte("hr"),
154 []byte("img"),
155 []byte("input"),
156 []byte("keygen"),
157 []byte("link"),
158 []byte("meta"),
159 []byte("param"),
160 []byte("source"),
161 []byte("track"),
162 []byte("wbr"),
163}
164
[46]165var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
166 []byte("alternate"),
167 []byte("archives"),
168 []byte("author"),
169 []byte("copyright"),
170 []byte("first"),
171 []byte("help"),
172 []byte("icon"),
173 []byte("index"),
174 []byte("last"),
175 []byte("license"),
176 []byte("manifest"),
177 []byte("next"),
178 []byte("pingback"),
179 []byte("prev"),
180 []byte("publisher"),
181 []byte("search"),
182 []byte("shortcut icon"),
183 []byte("stylesheet"),
184 []byte("up"),
185}
186
187var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
188 // X-UA-Compatible will be added automaticaly, so it can be skipped
189 []byte("date"),
190 []byte("last-modified"),
[50]191 []byte("refresh"), // URL rewrite
[46]192 // []byte("location"), TODO URL rewrite
193 []byte("content-language"),
194}
195
[1]196type Proxy struct {
[4]197 Key []byte
198 RequestTimeout time.Duration
[1]199}
200
201type RequestConfig struct {
202 Key []byte
[23]203 BaseURL *url.URL
[1]204}
205
[2]206var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]207
208var HTML_BODY_EXTENSION string = `
[72]209<input type="checkbox" id="mortytoggle" autocomplete="off" />
[1]210<div id="mortyheader">
[72]211 <p>This is a <a href="https://github.com/asciimoo/morty">proxified and sanitized</a> view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
[1]212</div>
213<style>
[67]214#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
215#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
216#mortyheader p { padding: 0 0 0.7em 0; display: block; }
217#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
218#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
[1]219input[type=checkbox]#mortytoggle { display: none; }
[72]220input[type=checkbox]#mortytoggle:checked ~ div { display: none; visibility: hidden; }
[1]221</style>
222`
223
[46]224var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
225<meta http-equiv="X-UA-Compatible" content="IE=edge">
[67]226<meta name="referrer" content="no-referrer">
[46]227`
[45]228
[69]229var MORTY_HTML_PAGE_START string = `<!doctype html>
230<html>
231<head>
232<title>MortyProxy</title>
233<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
234<style>
235html { height: 100%; }
236body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
237input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
238input[placeholder] { width:80%; }
239a { text-decoration: none; #2980b9; }
240h1, h2 { font-weight: 200; margin-bottom: 2rem; }
241h1 { font-size: 3em; }
242.container { flex:1; min-height: 100%; margin-bottom: 1em; }
243.footer { margin: 1em; }
244.footer p { font-size: 0.8em; }
245</style>
246</head>
247<body>
248 <div class="container">
249 <h1>MortyProxy</h1>
250`
251
252var MORTY_HTML_PAGE_END string = `
253 </div>
254 <div class="footer">
255 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
256 <a href="https://github.com/asciimoo/morty">view on github</a>
257 </p>
258 </div>
259</body>
260</html>`
261
[67]262var FAVICON_BYTES []byte
263
264func init() {
265 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
266
267 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
268}
269
[1]270func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]271
272 if appRequestHandler(ctx) {
273 return
274 }
275
[1]276 requestHash := popRequestParam(ctx, []byte("mortyhash"))
277
278 requestURI := popRequestParam(ctx, []byte("mortyurl"))
279
280 if requestURI == nil {
[35]281 p.serveMainPage(ctx, 200, nil)
[1]282 return
283 }
284
285 if p.Key != nil {
286 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[35]287 // HTTP status code 403 : Forbidden
288 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
[1]289 return
290 }
291 }
292
[97]293 parsedURI, err := url.Parse(string(requestURI))
[1]294
[11]295 if err != nil {
[35]296 // HTTP status code 500 : Internal Server Error
297 p.serveMainPage(ctx, 500, err)
[1]298 return
299 }
300
[69]301 // Serve an intermediate page for protocols other than HTTP(S)
302 if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
303 p.serveExitMortyPage(ctx, parsedURI)
304 return
305 }
306
[1]307 req := fasthttp.AcquireRequest()
308 defer fasthttp.ReleaseRequest(req)
[12]309 req.SetConnectionClose()
[1]310
[47]311 requestURIStr := string(requestURI)
[1]312
[97]313 if DEBUG {
314 log.Println("getting", requestURIStr)
315 }
[1]316
[47]317 req.SetRequestURI(requestURIStr)
[111]318 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"))
[1]319
320 resp := fasthttp.AcquireResponse()
321 defer fasthttp.ReleaseResponse(resp)
322
323 req.Header.SetMethodBytes(ctx.Method())
324 if ctx.IsPost() || ctx.IsPut() {
325 req.SetBody(ctx.PostBody())
326 }
327
[11]328 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
329
330 if err != nil {
[35]331 if err == fasthttp.ErrTimeout {
332 // HTTP status code 504 : Gateway Time-Out
333 p.serveMainPage(ctx, 504, err)
334 } else {
335 // HTTP status code 500 : Internal Server Error
336 p.serveMainPage(ctx, 500, err)
337 }
[1]338 return
339 }
340
341 if resp.StatusCode() != 200 {
342 switch resp.StatusCode() {
[7]343 case 301, 302, 303, 307, 308:
[1]344 loc := resp.Header.Peek("Location")
345 if loc != nil {
[97]346 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
347 url, err := rc.ProxifyURI(loc)
348 if err == nil {
349 ctx.SetStatusCode(resp.StatusCode())
350 ctx.Response.Header.Add("Location", url)
351 if DEBUG {
352 log.Println("redirect to", string(loc))
[96]353 }
[1]354 return
355 }
356 }
357 }
[47]358 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
[37]359 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
[1]360 return
361 }
362
[68]363 contentTypeBytes := resp.Header.Peek("Content-Type")
[1]364
[68]365 if contentTypeBytes == nil {
[35]366 // HTTP status code 503 : Service Unavailable
367 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[1]368 return
369 }
370
[68]371 contentTypeString := string(contentTypeBytes)
372
373 // decode Content-Type header
374 contentType, error := contenttype.ParseContentType(contentTypeString)
375 if error != nil {
376 // HTTP status code 503 : Service Unavailable
377 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
[63]378 return
379 }
380
[68]381 // content-disposition
382 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
[64]383
[68]384 // check content type
385 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
386 // it is not a usual content type
387 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
388 // force attachment for allowed content type
389 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
390 } else {
391 // deny access to forbidden content type
392 // HTTP status code 403 : Forbidden
393 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
394 return
395 }
396 }
397
398 // HACK : replace */xhtml by text/html
399 if contentType.SubType == "xhtml" {
400 contentType.TopLevelType = "text"
401 contentType.SubType = "html"
402 contentType.Suffix = ""
403 }
404
405 // conversion to UTF-8
[1]406 var responseBody []byte
407
[68]408 if contentType.TopLevelType == "text" {
409 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
[45]410 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
411 responseBody, err = e.NewDecoder().Bytes(resp.Body())
412 if err != nil {
413 // HTTP status code 503 : Service Unavailable
414 p.serveMainPage(ctx, 503, err)
415 return
416 }
417 } else {
418 responseBody = resp.Body()
[1]419 }
[68]420 // update the charset or specify it
421 contentType.Parameters["charset"] = "UTF-8"
[1]422 } else {
423 responseBody = resp.Body()
424 }
425
[68]426 //
427 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
[1]428
[68]429 // set the content type
430 ctx.SetContentType(contentType.String())
431
432 // output according to MIME type
[1]433 switch {
[68]434 case contentType.SubType == "css" && contentType.Suffix == "":
[23]435 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[68]436 case contentType.SubType == "html" && contentType.Suffix == "":
[23]437 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
[1]438 default:
[68]439 if contentDispositionBytes != nil {
440 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
[39]441 }
[1]442 ctx.Write(responseBody)
443 }
444}
445
[68]446// force content-disposition to attachment
447func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
448 var contentDispositionParams map[string]string
449
450 if contentDispositionBytes != nil {
451 var err error
452 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
453 if err != nil {
454 contentDispositionParams = make(map[string]string)
455 }
456 } else {
457 contentDispositionParams = make(map[string]string)
458 }
459
460 _, fileNameDefined := contentDispositionParams["filename"]
461 if !fileNameDefined {
462 // TODO : sanitize filename
463 contentDispositionParams["fileName"] = filepath.Base(url.Path)
464 }
465
466 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
467}
468
[10]469func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]470 // serve robots.txt
[10]471 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
472 ctx.SetContentType("text/plain")
473 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
474 return true
475 }
[11]476
[67]477 // server favicon.ico
478 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
479 ctx.SetContentType("image/png")
480 ctx.Write(FAVICON_BYTES)
481 return true
482 }
483
[10]484 return false
485}
486
[1]487func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
488 param := ctx.QueryArgs().PeekBytes(paramName)
489
490 if param == nil {
491 param = ctx.PostArgs().PeekBytes(paramName)
492 if param != nil {
493 ctx.PostArgs().DelBytes(paramName)
494 }
495 } else {
496 ctx.QueryArgs().DelBytes(paramName)
497 }
498
499 return param
500}
501
[9]502func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]503 // TODO
504
505 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
506
507 if urlSlices == nil {
[9]508 out.Write(css)
[1]509 return
510 }
511
512 startIndex := 0
513
514 for _, s := range urlSlices {
[15]515 urlStart := s[4]
516 urlEnd := s[5]
[1]517
[60]518 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
[9]519 out.Write(css[startIndex:urlStart])
520 out.Write([]byte(uri))
[1]521 startIndex = urlEnd
[97]522 } else if DEBUG {
[36]523 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
[1]524 }
525 }
526 if startIndex < len(css) {
[9]527 out.Write(css[startIndex:len(css)])
[1]528 }
529}
530
[9]531func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]532 r := bytes.NewReader(htmlDoc)
533 decoder := html.NewTokenizer(r)
534 decoder.AllowCDATA(true)
535
536 unsafeElements := make([][]byte, 0, 8)
537 state := STATE_DEFAULT
538 for {
539 token := decoder.Next()
540 if token == html.ErrorToken {
541 err := decoder.Err()
542 if err != io.EOF {
[97]543 log.Println("failed to parse HTML")
[1]544 }
545 break
546 }
547
548 if len(unsafeElements) == 0 {
549
550 switch token {
551 case html.StartTagToken, html.SelfClosingTagToken:
552 tag, hasAttrs := decoder.TagName()
553 safe := !inArray(tag, UNSAFE_ELEMENTS)
554 if !safe {
555 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
556 var unsafeTag []byte = make([]byte, len(tag))
557 copy(unsafeTag, tag)
558 unsafeElements = append(unsafeElements, unsafeTag)
559 }
560 break
561 }
[38]562 if bytes.Equal(tag, []byte("base")) {
563 for {
564 attrName, attrValue, moreAttr := decoder.TagAttr()
[45]565 if bytes.Equal(attrName, []byte("href")) {
566 parsedURI, err := url.Parse(string(attrValue))
567 if err == nil {
568 rc.BaseURL = parsedURI
569 }
[38]570 }
571 if !moreAttr {
572 break
573 }
574 }
575 break
576 }
[1]577 if bytes.Equal(tag, []byte("noscript")) {
578 state = STATE_IN_NOSCRIPT
579 break
580 }
581 var attrs [][][]byte
582 if hasAttrs {
583 for {
584 attrName, attrValue, moreAttr := decoder.TagAttr()
[21]585 attrs = append(attrs, [][]byte{
586 attrName,
587 attrValue,
588 []byte(html.EscapeString(string(attrValue))),
589 })
[1]590 if !moreAttr {
591 break
592 }
593 }
[13]594 }
595 if bytes.Equal(tag, []byte("link")) {
596 sanitizeLinkTag(rc, out, attrs)
597 break
598 }
599
[45]600 if bytes.Equal(tag, []byte("meta")) {
601 sanitizeMetaTag(rc, out, attrs)
602 break
603 }
604
[13]605 fmt.Fprintf(out, "<%s", tag)
606
607 if hasAttrs {
[45]608 sanitizeAttrs(rc, out, attrs)
[1]609 }
[13]610
[1]611 if token == html.SelfClosingTagToken {
[9]612 fmt.Fprintf(out, " />")
[1]613 } else {
[9]614 fmt.Fprintf(out, ">")
[1]615 if bytes.Equal(tag, []byte("style")) {
616 state = STATE_IN_STYLE
617 }
618 }
[13]619
[45]620 if bytes.Equal(tag, []byte("head")) {
[46]621 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
[45]622 }
623
[1]624 if bytes.Equal(tag, []byte("form")) {
625 var formURL *url.URL
626 for _, attr := range attrs {
627 if bytes.Equal(attr[0], []byte("action")) {
628 formURL, _ = url.Parse(string(attr[1]))
[28]629 formURL = mergeURIs(rc.BaseURL, formURL)
[1]630 break
631 }
632 }
633 if formURL == nil {
[23]634 formURL = rc.BaseURL
[1]635 }
[2]636 urlStr := formURL.String()
637 var key string
638 if rc.Key != nil {
639 key = hash(urlStr, rc.Key)
640 }
[9]641 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]642
643 }
644
645 case html.EndTagToken:
646 tag, _ := decoder.TagName()
647 writeEndTag := true
648 switch string(tag) {
649 case "body":
[23]650 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
[1]651 case "style":
652 state = STATE_DEFAULT
653 case "noscript":
654 state = STATE_DEFAULT
655 writeEndTag = false
656 }
657 // skip noscript tags - only the tag, not the content, because javascript is sanitized
658 if writeEndTag {
[9]659 fmt.Fprintf(out, "</%s>", tag)
[1]660 }
661
662 case html.TextToken:
663 switch state {
664 case STATE_DEFAULT:
[9]665 fmt.Fprintf(out, "%s", decoder.Raw())
[1]666 case STATE_IN_STYLE:
[9]667 sanitizeCSS(rc, out, decoder.Raw())
[1]668 case STATE_IN_NOSCRIPT:
[9]669 sanitizeHTML(rc, out, decoder.Raw())
[1]670 }
671
[62]672 case html.CommentToken:
673 // ignore comment. TODO : parse IE conditional comment
674
675 case html.DoctypeToken:
[9]676 out.Write(decoder.Raw())
[1]677 }
678 } else {
679 switch token {
680 case html.StartTagToken:
681 tag, _ := decoder.TagName()
682 if inArray(tag, UNSAFE_ELEMENTS) {
683 unsafeElements = append(unsafeElements, tag)
684 }
685
686 case html.EndTagToken:
687 tag, _ := decoder.TagName()
688 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
689 unsafeElements = unsafeElements[:len(unsafeElements)-1]
690 }
691 }
692 }
693 }
694}
695
[13]696func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
697 exclude := false
698 for _, attr := range attrs {
699 attrName := attr[0]
700 attrValue := attr[1]
701 if bytes.Equal(attrName, []byte("rel")) {
[46]702 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
[13]703 exclude = true
704 break
705 }
706 }
707 if bytes.Equal(attrName, []byte("as")) {
708 if bytes.Equal(attrValue, []byte("script")) {
709 exclude = true
710 break
711 }
712 }
713 }
714
715 if !exclude {
716 out.Write([]byte("<link"))
717 for _, attr := range attrs {
[21]718 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[13]719 }
720 out.Write([]byte(">"))
721 }
722}
723
[45]724func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]725 var http_equiv []byte
726 var content []byte
727
728 for _, attr := range attrs {
729 attrName := attr[0]
730 attrValue := attr[1]
731 if bytes.Equal(attrName, []byte("http-equiv")) {
732 http_equiv = bytes.ToLower(attrValue)
[46]733 // exclude some <meta http-equiv="..." ..>
734 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
735 return
736 }
[1]737 }
738 if bytes.Equal(attrName, []byte("content")) {
739 content = attrValue
740 }
[45]741 if bytes.Equal(attrName, []byte("charset")) {
742 // exclude <meta charset="...">
743 return
744 }
[1]745 }
746
[45]747 out.Write([]byte("<meta"))
[14]748 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
749 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
750 contentUrl := content[urlIndex+4:]
[36]751 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
[37]752 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
[36]753 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
[37]754 contentUrl = contentUrl[1 : len(contentUrl)-1]
[36]755 }
756 }
757 // output proxify result
[60]758 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
[14]759 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
[1]760 }
761 } else {
[46]762 if len(http_equiv) > 0 {
763 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
764 }
[9]765 sanitizeAttrs(rc, out, attrs)
[1]766 }
[45]767 out.Write([]byte(">"))
[1]768}
769
[9]770func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]771 for _, attr := range attrs {
[21]772 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
[1]773 }
774}
775
[21]776func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
[1]777 if inArray(attrName, SAFE_ATTRIBUTES) {
[21]778 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
[1]779 return
780 }
781 switch string(attrName) {
782 case "src", "href", "action":
[60]783 if uri, err := rc.ProxifyURI(attrValue); err == nil {
[9]784 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[97]785 } else if DEBUG {
[36]786 log.Println("cannot proxify uri:", string(attrValue))
[1]787 }
788 case "style":
[21]789 cssAttr := bytes.NewBuffer(nil)
790 sanitizeCSS(rc, cssAttr, attrValue)
791 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
[1]792 }
793}
794
[36]795func mergeURIs(u1, u2 *url.URL) *url.URL {
[71]796 if u2 == nil {
797 return u1
798 }
[28]799 return u1.ResolveReference(u2)
[1]800}
801
[60]802// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
803// avoid memory allocation (except for the scheme)
804func sanitizeURI(uri []byte) ([]byte, string) {
805 first_rune_index := 0
806 first_rune_seen := false
807 scheme_last_index := -1
808 buffer := bytes.NewBuffer(make([]byte, 0, 10))
809
810 // remove trailing space and special characters
811 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
812
813 // loop over byte by byte
814 for i, c := range uri {
815 // ignore special characters and space (c <= 32)
816 if c > 32 {
817 // append to the lower case of the rune to buffer
818 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
819 c = c + 'a' - 'A'
820 }
821
822 buffer.WriteByte(c)
823
824 // update the first rune index that is not a special rune
825 if !first_rune_seen {
826 first_rune_index = i
827 first_rune_seen = true
828 }
829
830 if c == ':' {
831 // colon rune found, we have found the scheme
832 scheme_last_index = i
833 break
834 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
835 // special case : most probably a relative URI
836 break
837 }
838 }
839 }
840
841 if scheme_last_index != -1 {
842 // scheme found
843 // copy the "lower case without special runes scheme" before the ":" rune
844 scheme_start_index := scheme_last_index - buffer.Len() + 1
845 copy(uri[scheme_start_index:], buffer.Bytes())
846 // and return the result
847 return uri[scheme_start_index:], buffer.String()
848 } else {
849 // scheme NOT found
850 return uri[first_rune_index:], ""
851 }
852}
853
854func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
855 // sanitize URI
856 uri, scheme := sanitizeURI(uri)
857
[28]858 // remove javascript protocol
[60]859 if scheme == "javascript:" {
[28]860 return "", nil
861 }
[57]862
[1]863 // TODO check malicious data: - e.g. data:script
[60]864 if scheme == "data:" {
[61]865 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
866 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
867 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
868 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
869 bytes.HasPrefix(uri, []byte("data:image/webp")) {
870 // should be safe
871 return string(uri), nil
872 } else {
873 // unsafe data
874 return "", nil
875 }
[1]876 }
877
[57]878 // parse the uri
[60]879 u, err := url.Parse(string(uri))
[1]880 if err != nil {
881 return "", err
882 }
[57]883
884 // get the fragment (with the prefix "#")
885 fragment := ""
886 if len(u.Fragment) > 0 {
887 fragment = "#" + u.Fragment
888 }
889
890 // reset the fragment: it is not included in the mortyurl
891 u.Fragment = ""
892
893 // merge the URI with the document URI
[28]894 u = mergeURIs(rc.BaseURL, u)
[1]895
[57]896 // simple internal link ?
897 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
898 if u.Scheme == rc.BaseURL.Scheme &&
899 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
900 u.Host == rc.BaseURL.Host &&
901 u.Path == rc.BaseURL.Path &&
902 u.RawQuery == rc.BaseURL.RawQuery {
903 // the fragment is the only difference between the document URI and the uri parameter
904 return fragment, nil
905 }
906
907 // return full URI and fragment (if not empty)
[60]908 morty_uri := u.String()
[1]909
910 if rc.Key == nil {
[60]911 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
[1]912 }
[60]913 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
[1]914}
915
916func inArray(b []byte, a [][]byte) bool {
917 for _, b2 := range a {
918 if bytes.Equal(b, b2) {
919 return true
920 }
921 }
922 return false
923}
924
925func hash(msg string, key []byte) string {
926 mac := hmac.New(sha256.New, key)
927 mac.Write([]byte(msg))
928 return hex.EncodeToString(mac.Sum(nil))
929}
930
931func verifyRequestURI(uri, hashMsg, key []byte) bool {
932 h := make([]byte, hex.DecodedLen(len(hashMsg)))
933 _, err := hex.Decode(h, hashMsg)
934 if err != nil {
[97]935 if DEBUG {
936 log.Println("hmac error:", err)
937 }
[1]938 return false
939 }
940 mac := hmac.New(sha256.New, key)
941 mac.Write(uri)
942 return hmac.Equal(h, mac.Sum(nil))
943}
944
[69]945func (p *Proxy) serveExitMortyPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
946 ctx.SetContentType("text/html")
947 ctx.SetStatusCode(403)
948 ctx.Write([]byte(MORTY_HTML_PAGE_START))
949 ctx.Write([]byte("<h2>You are about to exit MortyProxy</h2>"))
950 ctx.Write([]byte("<p>Following</p><p><a href=\""))
951 ctx.Write([]byte(html.EscapeString(uri.String())))
952 ctx.Write([]byte("\" rel=\"noreferrer\">"))
953 ctx.Write([]byte(html.EscapeString(uri.String())))
954 ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
955 ctx.Write([]byte(MORTY_HTML_PAGE_END))
956}
957
[35]958func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
[67]959 ctx.SetContentType("text/html; charset=UTF-8")
[35]960 ctx.SetStatusCode(statusCode)
[69]961 ctx.Write([]byte(MORTY_HTML_PAGE_START))
[11]962 if err != nil {
[97]963 if DEBUG {
964 log.Println("error:", err)
965 }
[11]966 ctx.Write([]byte("<h2>Error: "))
967 ctx.Write([]byte(html.EscapeString(err.Error())))
968 ctx.Write([]byte("</h2>"))
969 }
[1]970 if p.Key == nil {
971 ctx.Write([]byte(`
[36]972 <form action="post">
973 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
974 <input type="submit" value="go" />
975 </form>`))
[11]976 } else {
977 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]978 }
[69]979 ctx.Write([]byte(MORTY_HTML_PAGE_END))
[1]980}
981
982func main() {
[78]983 default_listen_addr := os.Getenv("MORTY_ADDRESS")
984 if default_listen_addr == "" {
985 default_listen_addr = "127.0.0.1:3000"
986 }
987 default_key := os.Getenv("MORTY_KEY")
988 listen := flag.String("listen", default_listen_addr, "Listen address")
[92]989 key := flag.String("key", default_key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
[24]990 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
[74]991 version := flag.Bool("version", false, "Show version")
[4]992 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[109]993 socks5 := flag.String("socks5", "", "SOCKS5 proxy")
[1]994 flag.Parse()
995
[74]996 if *version {
997 fmt.Println(VERSION)
998 return
999 }
1000
[24]1001 if *ipv6 {
[109]1002 CLIENT.DialDualStack = true
[24]1003 }
1004
[109]1005 if *socks5 != "" {
1006 // this disables CLIENT.DialDualStack
1007 CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
1008 }
1009
[4]1010 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]1011
1012 if *key != "" {
[92]1013 var err error
1014 p.Key, err = base64.StdEncoding.DecodeString(*key)
[94]1015 if err != nil {
1016 log.Fatal("Error parsing -key", err.Error())
1017 os.Exit(1)
[92]1018 }
[1]1019 }
1020
1021 log.Println("listening on", *listen)
1022
1023 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
1024 log.Fatal("Error in ListenAndServe:", err)
1025 }
1026}
Note: See TracBrowser for help on using the repository browser.