source: code/trunk/morty.go@ 68

Last change on this file since 68 was 68, checked in by asciimoo, 8 years ago

Merge pull request #46 from dalf/html

[mod] different HTML / CSS modifications.

File size: 26.4 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/base64"
8 "encoding/hex"
9 "errors"
10 "flag"
11 "fmt"
12 "io"
13 "log"
14 "mime"
15 "net/url"
16 "path/filepath"
17 "regexp"
18 "strings"
19 "time"
20 "unicode/utf8"
21
22 "github.com/valyala/fasthttp"
23 "golang.org/x/net/html"
24 "golang.org/x/net/html/charset"
25 "golang.org/x/text/encoding"
26
27 "github.com/asciimoo/morty/contenttype"
28)
29
30const (
31 STATE_DEFAULT int = 0
32 STATE_IN_STYLE int = 1
33 STATE_IN_NOSCRIPT int = 2
34)
35
36var CLIENT *fasthttp.Client = &fasthttp.Client{
37 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
38}
39
40var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
41
42var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
43 // html
44 contenttype.NewFilterEquals("text", "html", ""),
45 contenttype.NewFilterEquals("application", "xhtml", "xml"),
46 // css
47 contenttype.NewFilterEquals("text", "css", ""),
48 // images
49 contenttype.NewFilterEquals("image", "gif", ""),
50 contenttype.NewFilterEquals("image", "png", ""),
51 contenttype.NewFilterEquals("image", "jpeg", ""),
52 contenttype.NewFilterEquals("image", "pjpeg", ""),
53 contenttype.NewFilterEquals("image", "webp", ""),
54 contenttype.NewFilterEquals("image", "tiff", ""),
55 contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
56 contenttype.NewFilterEquals("image", "bmp", ""),
57 contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
58 // fonts
59 contenttype.NewFilterEquals("application", "font-otf", ""),
60 contenttype.NewFilterEquals("application", "font-ttf", ""),
61 contenttype.NewFilterEquals("application", "font-woff", ""),
62 contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
63})
64
65var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
66 // texts
67 contenttype.NewFilterEquals("text", "csv", ""),
68 contenttype.NewFilterEquals("text", "tab-separated-value", ""),
69 contenttype.NewFilterEquals("text", "plain", ""),
70 // API
71 contenttype.NewFilterEquals("application", "json", ""),
72 // Documents
73 contenttype.NewFilterEquals("application", "x-latex", ""),
74 contenttype.NewFilterEquals("application", "pdf", ""),
75 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
76 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
77 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
78 contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
79 // Compressed archives
80 contenttype.NewFilterEquals("application", "zip", ""),
81 contenttype.NewFilterEquals("application", "gzip", ""),
82 contenttype.NewFilterEquals("application", "x-compressed", ""),
83 contenttype.NewFilterEquals("application", "x-gtar", ""),
84 contenttype.NewFilterEquals("application", "x-compress", ""),
85 // Generic binary
86 contenttype.NewFilterEquals("application", "octet-stream", ""),
87})
88
89var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
90 "charset": true,
91}
92
93var UNSAFE_ELEMENTS [][]byte = [][]byte{
94 []byte("applet"),
95 []byte("canvas"),
96 []byte("embed"),
97 //[]byte("iframe"),
98 []byte("math"),
99 []byte("script"),
100 []byte("svg"),
101}
102
103var SAFE_ATTRIBUTES [][]byte = [][]byte{
104 []byte("abbr"),
105 []byte("accesskey"),
106 []byte("align"),
107 []byte("alt"),
108 []byte("as"),
109 []byte("autocomplete"),
110 []byte("charset"),
111 []byte("checked"),
112 []byte("class"),
113 []byte("content"),
114 []byte("contenteditable"),
115 []byte("contextmenu"),
116 []byte("dir"),
117 []byte("for"),
118 []byte("height"),
119 []byte("hidden"),
120 []byte("hreflang"),
121 []byte("id"),
122 []byte("lang"),
123 []byte("media"),
124 []byte("method"),
125 []byte("name"),
126 []byte("nowrap"),
127 []byte("placeholder"),
128 []byte("property"),
129 []byte("rel"),
130 []byte("spellcheck"),
131 []byte("tabindex"),
132 []byte("target"),
133 []byte("title"),
134 []byte("translate"),
135 []byte("type"),
136 []byte("value"),
137 []byte("width"),
138}
139
140var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
141 []byte("area"),
142 []byte("base"),
143 []byte("br"),
144 []byte("col"),
145 []byte("embed"),
146 []byte("hr"),
147 []byte("img"),
148 []byte("input"),
149 []byte("keygen"),
150 []byte("link"),
151 []byte("meta"),
152 []byte("param"),
153 []byte("source"),
154 []byte("track"),
155 []byte("wbr"),
156}
157
158var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
159 []byte("alternate"),
160 []byte("archives"),
161 []byte("author"),
162 []byte("copyright"),
163 []byte("first"),
164 []byte("help"),
165 []byte("icon"),
166 []byte("index"),
167 []byte("last"),
168 []byte("license"),
169 []byte("manifest"),
170 []byte("next"),
171 []byte("pingback"),
172 []byte("prev"),
173 []byte("publisher"),
174 []byte("search"),
175 []byte("shortcut icon"),
176 []byte("stylesheet"),
177 []byte("up"),
178}
179
180var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
181 // X-UA-Compatible will be added automaticaly, so it can be skipped
182 []byte("date"),
183 []byte("last-modified"),
184 []byte("refresh"), // URL rewrite
185 // []byte("location"), TODO URL rewrite
186 []byte("content-language"),
187}
188
189type Proxy struct {
190 Key []byte
191 RequestTimeout time.Duration
192}
193
194type RequestConfig struct {
195 Key []byte
196 BaseURL *url.URL
197}
198
199var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
200
201var HTML_BODY_EXTENSION string = `
202<div id="mortyheader">
203 <input type="checkbox" id="mortytoggle" autocomplete="off" />
204 <p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p>
205</div>
206<style>
207#mortyheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 15%%; left: 0; max-width: 140px; overflow: hidden; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; padding: 12px 12px 8px 8px; color: #444; }
208#mortyheader * { box-sizing: content-box; margin: 0; border: none; padding: 0; overflow: hidden; z-index: 2147483647 !important; line-height: 1em; font-size: 12px !important; font-family: sans !important; font-weight: normal; text-align: left; text-decoration: none; }
209#mortyheader p { padding: 0 0 0.7em 0; display: block; }
210#mortyheader a { color: #3498db; font-weight: bold; display: inline; }
211#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; }
212input[type=checkbox]#mortytoggle { display: none; }
213input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
214</style>
215`
216
217var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
218<meta http-equiv="X-UA-Compatible" content="IE=edge">
219<meta name="referrer" content="no-referrer">
220`
221
222var FAVICON_BYTES []byte
223
224func init() {
225 FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
226
227 FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
228}
229
230func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
231
232 if appRequestHandler(ctx) {
233 return
234 }
235
236 requestHash := popRequestParam(ctx, []byte("mortyhash"))
237
238 requestURI := popRequestParam(ctx, []byte("mortyurl"))
239
240 if requestURI == nil {
241 p.serveMainPage(ctx, 200, nil)
242 return
243 }
244
245 if p.Key != nil {
246 if !verifyRequestURI(requestURI, requestHash, p.Key) {
247 // HTTP status code 403 : Forbidden
248 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
249 return
250 }
251 }
252
253 parsedURI, err := url.Parse(string(requestURI))
254
255 if strings.HasSuffix(parsedURI.Host, ".onion") {
256 // HTTP status code 501 : Not Implemented
257 p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
258 return
259 }
260
261 if err != nil {
262 // HTTP status code 500 : Internal Server Error
263 p.serveMainPage(ctx, 500, err)
264 return
265 }
266
267 req := fasthttp.AcquireRequest()
268 defer fasthttp.ReleaseRequest(req)
269 req.SetConnectionClose()
270
271 requestURIStr := string(requestURI)
272
273 log.Println("getting", requestURIStr)
274
275 req.SetRequestURI(requestURIStr)
276 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
277
278 resp := fasthttp.AcquireResponse()
279 defer fasthttp.ReleaseResponse(resp)
280
281 req.Header.SetMethodBytes(ctx.Method())
282 if ctx.IsPost() || ctx.IsPut() {
283 req.SetBody(ctx.PostBody())
284 }
285
286 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
287
288 if err != nil {
289 if err == fasthttp.ErrTimeout {
290 // HTTP status code 504 : Gateway Time-Out
291 p.serveMainPage(ctx, 504, err)
292 } else {
293 // HTTP status code 500 : Internal Server Error
294 p.serveMainPage(ctx, 500, err)
295 }
296 return
297 }
298
299 if resp.StatusCode() != 200 {
300 switch resp.StatusCode() {
301 case 301, 302, 303, 307, 308:
302 loc := resp.Header.Peek("Location")
303 if loc != nil {
304 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
305 url, err := rc.ProxifyURI(loc)
306 if err == nil {
307 ctx.SetStatusCode(resp.StatusCode())
308 ctx.Response.Header.Add("Location", url)
309 log.Println("redirect to", string(loc))
310 return
311 }
312 }
313 }
314 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
315 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
316 return
317 }
318
319 contentTypeBytes := resp.Header.Peek("Content-Type")
320
321 if contentTypeBytes == nil {
322 // HTTP status code 503 : Service Unavailable
323 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
324 return
325 }
326
327 contentTypeString := string(contentTypeBytes)
328
329 // decode Content-Type header
330 contentType, error := contenttype.ParseContentType(contentTypeString)
331 if error != nil {
332 // HTTP status code 503 : Service Unavailable
333 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
334 return
335 }
336
337 // content-disposition
338 contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
339
340 // check content type
341 if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
342 // it is not a usual content type
343 if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
344 // force attachment for allowed content type
345 contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
346 } else {
347 // deny access to forbidden content type
348 // HTTP status code 403 : Forbidden
349 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
350 return
351 }
352 }
353
354 // HACK : replace */xhtml by text/html
355 if contentType.SubType == "xhtml" {
356 contentType.TopLevelType = "text"
357 contentType.SubType = "html"
358 contentType.Suffix = ""
359 }
360
361 // conversion to UTF-8
362 var responseBody []byte
363
364 if contentType.TopLevelType == "text" {
365 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
366 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
367 responseBody, err = e.NewDecoder().Bytes(resp.Body())
368 if err != nil {
369 // HTTP status code 503 : Service Unavailable
370 p.serveMainPage(ctx, 503, err)
371 return
372 }
373 } else {
374 responseBody = resp.Body()
375 }
376 // update the charset or specify it
377 contentType.Parameters["charset"] = "UTF-8"
378 } else {
379 responseBody = resp.Body()
380 }
381
382 //
383 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
384
385 // set the content type
386 ctx.SetContentType(contentType.String())
387
388 // output according to MIME type
389 switch {
390 case contentType.SubType == "css" && contentType.Suffix == "":
391 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
392 case contentType.SubType == "html" && contentType.Suffix == "":
393 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
394 default:
395 if contentDispositionBytes != nil {
396 ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
397 }
398 ctx.Write(responseBody)
399 }
400}
401
402// force content-disposition to attachment
403func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
404 var contentDispositionParams map[string]string
405
406 if contentDispositionBytes != nil {
407 var err error
408 _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
409 if err != nil {
410 contentDispositionParams = make(map[string]string)
411 }
412 } else {
413 contentDispositionParams = make(map[string]string)
414 }
415
416 _, fileNameDefined := contentDispositionParams["filename"]
417 if !fileNameDefined {
418 // TODO : sanitize filename
419 contentDispositionParams["fileName"] = filepath.Base(url.Path)
420 }
421
422 return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
423}
424
425func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
426 // serve robots.txt
427 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
428 ctx.SetContentType("text/plain")
429 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
430 return true
431 }
432
433 // server favicon.ico
434 if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
435 ctx.SetContentType("image/png")
436 ctx.Write(FAVICON_BYTES)
437 return true
438 }
439
440 return false
441}
442
443func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
444 param := ctx.QueryArgs().PeekBytes(paramName)
445
446 if param == nil {
447 param = ctx.PostArgs().PeekBytes(paramName)
448 if param != nil {
449 ctx.PostArgs().DelBytes(paramName)
450 }
451 } else {
452 ctx.QueryArgs().DelBytes(paramName)
453 }
454
455 return param
456}
457
458func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
459 // TODO
460
461 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
462
463 if urlSlices == nil {
464 out.Write(css)
465 return
466 }
467
468 startIndex := 0
469
470 for _, s := range urlSlices {
471 urlStart := s[4]
472 urlEnd := s[5]
473
474 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
475 out.Write(css[startIndex:urlStart])
476 out.Write([]byte(uri))
477 startIndex = urlEnd
478 } else {
479 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
480 }
481 }
482 if startIndex < len(css) {
483 out.Write(css[startIndex:len(css)])
484 }
485}
486
487func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
488 r := bytes.NewReader(htmlDoc)
489 decoder := html.NewTokenizer(r)
490 decoder.AllowCDATA(true)
491
492 unsafeElements := make([][]byte, 0, 8)
493 state := STATE_DEFAULT
494 for {
495 token := decoder.Next()
496 if token == html.ErrorToken {
497 err := decoder.Err()
498 if err != io.EOF {
499 log.Println("failed to parse HTML:")
500 }
501 break
502 }
503
504 if len(unsafeElements) == 0 {
505
506 switch token {
507 case html.StartTagToken, html.SelfClosingTagToken:
508 tag, hasAttrs := decoder.TagName()
509 safe := !inArray(tag, UNSAFE_ELEMENTS)
510 if !safe {
511 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
512 var unsafeTag []byte = make([]byte, len(tag))
513 copy(unsafeTag, tag)
514 unsafeElements = append(unsafeElements, unsafeTag)
515 }
516 break
517 }
518 if bytes.Equal(tag, []byte("base")) {
519 for {
520 attrName, attrValue, moreAttr := decoder.TagAttr()
521 if bytes.Equal(attrName, []byte("href")) {
522 parsedURI, err := url.Parse(string(attrValue))
523 if err == nil {
524 rc.BaseURL = parsedURI
525 }
526 }
527 if !moreAttr {
528 break
529 }
530 }
531 break
532 }
533 if bytes.Equal(tag, []byte("noscript")) {
534 state = STATE_IN_NOSCRIPT
535 break
536 }
537 var attrs [][][]byte
538 if hasAttrs {
539 for {
540 attrName, attrValue, moreAttr := decoder.TagAttr()
541 attrs = append(attrs, [][]byte{
542 attrName,
543 attrValue,
544 []byte(html.EscapeString(string(attrValue))),
545 })
546 if !moreAttr {
547 break
548 }
549 }
550 }
551 if bytes.Equal(tag, []byte("link")) {
552 sanitizeLinkTag(rc, out, attrs)
553 break
554 }
555
556 if bytes.Equal(tag, []byte("meta")) {
557 sanitizeMetaTag(rc, out, attrs)
558 break
559 }
560
561 fmt.Fprintf(out, "<%s", tag)
562
563 if hasAttrs {
564 sanitizeAttrs(rc, out, attrs)
565 }
566
567 if token == html.SelfClosingTagToken {
568 fmt.Fprintf(out, " />")
569 } else {
570 fmt.Fprintf(out, ">")
571 if bytes.Equal(tag, []byte("style")) {
572 state = STATE_IN_STYLE
573 }
574 }
575
576 if bytes.Equal(tag, []byte("head")) {
577 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
578 }
579
580 if bytes.Equal(tag, []byte("form")) {
581 var formURL *url.URL
582 for _, attr := range attrs {
583 if bytes.Equal(attr[0], []byte("action")) {
584 formURL, _ = url.Parse(string(attr[1]))
585 formURL = mergeURIs(rc.BaseURL, formURL)
586 break
587 }
588 }
589 if formURL == nil {
590 formURL = rc.BaseURL
591 }
592 urlStr := formURL.String()
593 var key string
594 if rc.Key != nil {
595 key = hash(urlStr, rc.Key)
596 }
597 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
598
599 }
600
601 case html.EndTagToken:
602 tag, _ := decoder.TagName()
603 writeEndTag := true
604 switch string(tag) {
605 case "body":
606 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
607 case "style":
608 state = STATE_DEFAULT
609 case "noscript":
610 state = STATE_DEFAULT
611 writeEndTag = false
612 }
613 // skip noscript tags - only the tag, not the content, because javascript is sanitized
614 if writeEndTag {
615 fmt.Fprintf(out, "</%s>", tag)
616 }
617
618 case html.TextToken:
619 switch state {
620 case STATE_DEFAULT:
621 fmt.Fprintf(out, "%s", decoder.Raw())
622 case STATE_IN_STYLE:
623 sanitizeCSS(rc, out, decoder.Raw())
624 case STATE_IN_NOSCRIPT:
625 sanitizeHTML(rc, out, decoder.Raw())
626 }
627
628 case html.CommentToken:
629 // ignore comment. TODO : parse IE conditional comment
630
631 case html.DoctypeToken:
632 out.Write(decoder.Raw())
633 }
634 } else {
635 switch token {
636 case html.StartTagToken:
637 tag, _ := decoder.TagName()
638 if inArray(tag, UNSAFE_ELEMENTS) {
639 unsafeElements = append(unsafeElements, tag)
640 }
641
642 case html.EndTagToken:
643 tag, _ := decoder.TagName()
644 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
645 unsafeElements = unsafeElements[:len(unsafeElements)-1]
646 }
647 }
648 }
649 }
650}
651
652func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
653 exclude := false
654 for _, attr := range attrs {
655 attrName := attr[0]
656 attrValue := attr[1]
657 if bytes.Equal(attrName, []byte("rel")) {
658 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
659 exclude = true
660 break
661 }
662 }
663 if bytes.Equal(attrName, []byte("as")) {
664 if bytes.Equal(attrValue, []byte("script")) {
665 exclude = true
666 break
667 }
668 }
669 }
670
671 if !exclude {
672 out.Write([]byte("<link"))
673 for _, attr := range attrs {
674 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
675 }
676 out.Write([]byte(">"))
677 }
678}
679
680func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
681 var http_equiv []byte
682 var content []byte
683
684 for _, attr := range attrs {
685 attrName := attr[0]
686 attrValue := attr[1]
687 if bytes.Equal(attrName, []byte("http-equiv")) {
688 http_equiv = bytes.ToLower(attrValue)
689 // exclude some <meta http-equiv="..." ..>
690 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
691 return
692 }
693 }
694 if bytes.Equal(attrName, []byte("content")) {
695 content = attrValue
696 }
697 if bytes.Equal(attrName, []byte("charset")) {
698 // exclude <meta charset="...">
699 return
700 }
701 }
702
703 out.Write([]byte("<meta"))
704 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
705 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
706 contentUrl := content[urlIndex+4:]
707 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
708 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
709 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
710 contentUrl = contentUrl[1 : len(contentUrl)-1]
711 }
712 }
713 // output proxify result
714 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
715 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
716 }
717 } else {
718 if len(http_equiv) > 0 {
719 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
720 }
721 sanitizeAttrs(rc, out, attrs)
722 }
723 out.Write([]byte(">"))
724}
725
726func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
727 for _, attr := range attrs {
728 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
729 }
730}
731
732func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
733 if inArray(attrName, SAFE_ATTRIBUTES) {
734 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
735 return
736 }
737 switch string(attrName) {
738 case "src", "href", "action":
739 if uri, err := rc.ProxifyURI(attrValue); err == nil {
740 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
741 } else {
742 log.Println("cannot proxify uri:", string(attrValue))
743 }
744 case "style":
745 cssAttr := bytes.NewBuffer(nil)
746 sanitizeCSS(rc, cssAttr, attrValue)
747 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
748 }
749}
750
751func mergeURIs(u1, u2 *url.URL) *url.URL {
752 return u1.ResolveReference(u2)
753}
754
755// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
756// avoid memory allocation (except for the scheme)
757func sanitizeURI(uri []byte) ([]byte, string) {
758 first_rune_index := 0
759 first_rune_seen := false
760 scheme_last_index := -1
761 buffer := bytes.NewBuffer(make([]byte, 0, 10))
762
763 // remove trailing space and special characters
764 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
765
766 // loop over byte by byte
767 for i, c := range uri {
768 // ignore special characters and space (c <= 32)
769 if c > 32 {
770 // append to the lower case of the rune to buffer
771 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
772 c = c + 'a' - 'A'
773 }
774
775 buffer.WriteByte(c)
776
777 // update the first rune index that is not a special rune
778 if !first_rune_seen {
779 first_rune_index = i
780 first_rune_seen = true
781 }
782
783 if c == ':' {
784 // colon rune found, we have found the scheme
785 scheme_last_index = i
786 break
787 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
788 // special case : most probably a relative URI
789 break
790 }
791 }
792 }
793
794 if scheme_last_index != -1 {
795 // scheme found
796 // copy the "lower case without special runes scheme" before the ":" rune
797 scheme_start_index := scheme_last_index - buffer.Len() + 1
798 copy(uri[scheme_start_index:], buffer.Bytes())
799 // and return the result
800 return uri[scheme_start_index:], buffer.String()
801 } else {
802 // scheme NOT found
803 return uri[first_rune_index:], ""
804 }
805}
806
807func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
808 // sanitize URI
809 uri, scheme := sanitizeURI(uri)
810
811 // remove javascript protocol
812 if scheme == "javascript:" {
813 return "", nil
814 }
815
816 // TODO check malicious data: - e.g. data:script
817 if scheme == "data:" {
818 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
819 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
820 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
821 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
822 bytes.HasPrefix(uri, []byte("data:image/webp")) {
823 // should be safe
824 return string(uri), nil
825 } else {
826 // unsafe data
827 return "", nil
828 }
829 }
830
831 // parse the uri
832 u, err := url.Parse(string(uri))
833 if err != nil {
834 return "", err
835 }
836
837 // get the fragment (with the prefix "#")
838 fragment := ""
839 if len(u.Fragment) > 0 {
840 fragment = "#" + u.Fragment
841 }
842
843 // reset the fragment: it is not included in the mortyurl
844 u.Fragment = ""
845
846 // merge the URI with the document URI
847 u = mergeURIs(rc.BaseURL, u)
848
849 // simple internal link ?
850 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
851 if u.Scheme == rc.BaseURL.Scheme &&
852 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
853 u.Host == rc.BaseURL.Host &&
854 u.Path == rc.BaseURL.Path &&
855 u.RawQuery == rc.BaseURL.RawQuery {
856 // the fragment is the only difference between the document URI and the uri parameter
857 return fragment, nil
858 }
859
860 // return full URI and fragment (if not empty)
861 morty_uri := u.String()
862
863 if rc.Key == nil {
864 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
865 }
866 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
867}
868
869func inArray(b []byte, a [][]byte) bool {
870 for _, b2 := range a {
871 if bytes.Equal(b, b2) {
872 return true
873 }
874 }
875 return false
876}
877
878func hash(msg string, key []byte) string {
879 mac := hmac.New(sha256.New, key)
880 mac.Write([]byte(msg))
881 return hex.EncodeToString(mac.Sum(nil))
882}
883
884func verifyRequestURI(uri, hashMsg, key []byte) bool {
885 h := make([]byte, hex.DecodedLen(len(hashMsg)))
886 _, err := hex.Decode(h, hashMsg)
887 if err != nil {
888 log.Println("hmac error:", err)
889 return false
890 }
891 mac := hmac.New(sha256.New, key)
892 mac.Write(uri)
893 return hmac.Equal(h, mac.Sum(nil))
894}
895
896func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
897 ctx.SetContentType("text/html; charset=UTF-8")
898 ctx.SetStatusCode(statusCode)
899 ctx.Write([]byte(`<!doctype html>
900<html>
901<head>
902<title>MortyProxy</title>
903<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
904<style>
905html { height: 100%; }
906body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
907input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
908input[placeholder] { width:80%; }
909a { text-decoration: none; #2980b9; }
910h1, h2 { font-weight: 200; margin-bottom: 2rem; }
911h1 { font-size: 3em; }
912.container { flex:1; min-height: 100%; margin-bottom: 1em; }
913.footer { margin: 1em; }
914.footer p { font-size: 0.8em; }
915</style>
916</head>
917<body>
918 <div class="container">
919 <h1>MortyProxy</h1>
920`))
921 if err != nil {
922 log.Println("error:", err)
923 ctx.Write([]byte("<h2>Error: "))
924 ctx.Write([]byte(html.EscapeString(err.Error())))
925 ctx.Write([]byte("</h2>"))
926 }
927 if p.Key == nil {
928 ctx.Write([]byte(`
929 <form action="post">
930 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
931 <input type="submit" value="go" />
932 </form>`))
933 } else {
934 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
935 }
936 ctx.Write([]byte(`
937 </div>
938 <div class="footer">
939 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
940 <a href="https://github.com/asciimoo/morty">view on github</a>
941 </p>
942 </div>
943</body>
944</html>`))
945}
946
947func main() {
948
949 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
950 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
951 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
952 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
953 flag.Parse()
954
955 if *ipv6 {
956 CLIENT.Dial = fasthttp.DialDualStack
957 }
958
959 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
960
961 if *key != "" {
962 p.Key = []byte(*key)
963 }
964
965 log.Println("listening on", *listen)
966
967 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
968 log.Fatal("Error in ListenAndServe:", err)
969 }
970}
Note: See TracBrowser for help on using the repository browser.