source: code/trunk/morty.go@ 63

Last change on this file since 63 was 63, checked in by alex, 8 years ago

[enh] parse and filter Content-Type.
svg, mathml, multipart, xml (because of namespace) are forbidden.
the charset parameters in Content-Type is only set when it is by the original server.
the */xhtml+* Content-Type : the conversion to UTF-8 is now done (it wasn't the case before).
string type is used because of the mime package API.

File size: 24.6 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "regexp"
15 "strings"
16 "time"
17 "unicode/utf8"
18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/net/html/charset"
22 "golang.org/x/text/encoding"
23
24 "github.com/dalf/morty/contenttype"
25)
26
27const (
28 STATE_DEFAULT int = 0
29 STATE_IN_STYLE int = 1
30 STATE_IN_NOSCRIPT int = 2
31)
32
33var CLIENT *fasthttp.Client = &fasthttp.Client{
34 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
35}
36
37var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
38
39// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types
40// https://www.w3.org/TR/2009/WD-MathML3-20090604/mathml.pdf
41// http://planetsvg.com/tools/mime.php
42var FORBIDDEN_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
43 // javascript
44 contenttype.NewFilterContains("javascript"),
45 contenttype.NewFilterContains("ecmascript"),
46 contenttype.NewFilterEquals("application", "js", "*"),
47 // no xml (can contain xhtml or css)
48 contenttype.NewFilterEquals("text", "xml", "*"),
49 contenttype.NewFilterEquals("text", "xml-external-parsed-entity", "*"),
50 contenttype.NewFilterEquals("application", "xml", "*"),
51 contenttype.NewFilterEquals("application", "xml-external-parsed-entity", "*"),
52 contenttype.NewFilterEquals("application", "xslt", "xml"),
53 // no mathml
54 contenttype.NewFilterEquals("application", "mathml", "xml"),
55 contenttype.NewFilterEquals("application", "mathml-presentation", "xml"),
56 contenttype.NewFilterEquals("application", "mathml-content", "xml"),
57 // no svg
58 contenttype.NewFilterEquals("image", "svg", "xml"),
59 contenttype.NewFilterEquals("image", "svg-xml", "*"),
60 // no cache
61 contenttype.NewFilterEquals("text", "cache-manifest", "*"),
62 // no multipart
63 contenttype.NewFilterEquals("multipart", "*", "*"),
64 // no xul
65 contenttype.NewFilterEquals("application", "vnd.mozilla.xul", "xml"),
66 // no htc
67 contenttype.NewFilterEquals("text", "x-component", "*"),
68 // no flash
69 contenttype.NewFilterEquals("application", "x-shockwave-flash", "*"),
70 contenttype.NewFilterEquals("video", "x-flv", ""),
71 contenttype.NewFilterEquals("video", "vnd.sealed-swf", ""),
72 // no know format to have issues
73 contenttype.NewFilterEquals("image", "wmf", "*"),
74 contenttype.NewFilterEquals("image", "emf", "*"),
75 // some of the microsoft and IE mime types
76 contenttype.NewFilterEquals("text", "vbs", "*"),
77 contenttype.NewFilterEquals("text", "vbscript", "*"),
78 contenttype.NewFilterEquals("text", "scriptlet", "*"),
79 contenttype.NewFilterEquals("application", "x-vbs", "*"),
80 contenttype.NewFilterEquals("application", "olescript", "*"),
81 contenttype.NewFilterEquals("application", "x-msmetafile", "*"),
82 // no css (sometime, rendering depend on the browser)
83 contenttype.NewFilterEquals("application", "x-pointplus", "*"),
84})
85
86var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
87 "charset": true,
88}
89
90var UNSAFE_ELEMENTS [][]byte = [][]byte{
91 []byte("applet"),
92 []byte("canvas"),
93 []byte("embed"),
94 //[]byte("iframe"),
95 []byte("math"),
96 []byte("script"),
97 []byte("svg"),
98}
99
100var SAFE_ATTRIBUTES [][]byte = [][]byte{
101 []byte("abbr"),
102 []byte("accesskey"),
103 []byte("align"),
104 []byte("alt"),
105 []byte("as"),
106 []byte("autocomplete"),
107 []byte("charset"),
108 []byte("checked"),
109 []byte("class"),
110 []byte("content"),
111 []byte("contenteditable"),
112 []byte("contextmenu"),
113 []byte("dir"),
114 []byte("for"),
115 []byte("height"),
116 []byte("hidden"),
117 []byte("hreflang"),
118 []byte("id"),
119 []byte("lang"),
120 []byte("media"),
121 []byte("method"),
122 []byte("name"),
123 []byte("nowrap"),
124 []byte("placeholder"),
125 []byte("property"),
126 []byte("rel"),
127 []byte("spellcheck"),
128 []byte("tabindex"),
129 []byte("target"),
130 []byte("title"),
131 []byte("translate"),
132 []byte("type"),
133 []byte("value"),
134 []byte("width"),
135}
136
137var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
138 []byte("area"),
139 []byte("base"),
140 []byte("br"),
141 []byte("col"),
142 []byte("embed"),
143 []byte("hr"),
144 []byte("img"),
145 []byte("input"),
146 []byte("keygen"),
147 []byte("link"),
148 []byte("meta"),
149 []byte("param"),
150 []byte("source"),
151 []byte("track"),
152 []byte("wbr"),
153}
154
155var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
156 []byte("alternate"),
157 []byte("archives"),
158 []byte("author"),
159 []byte("copyright"),
160 []byte("first"),
161 []byte("help"),
162 []byte("icon"),
163 []byte("index"),
164 []byte("last"),
165 []byte("license"),
166 []byte("manifest"),
167 []byte("next"),
168 []byte("pingback"),
169 []byte("prev"),
170 []byte("publisher"),
171 []byte("search"),
172 []byte("shortcut icon"),
173 []byte("stylesheet"),
174 []byte("up"),
175}
176
177var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
178 // X-UA-Compatible will be added automaticaly, so it can be skipped
179 []byte("date"),
180 []byte("last-modified"),
181 []byte("refresh"), // URL rewrite
182 // []byte("location"), TODO URL rewrite
183 []byte("content-language"),
184}
185
186type Proxy struct {
187 Key []byte
188 RequestTimeout time.Duration
189}
190
191type RequestConfig struct {
192 Key []byte
193 BaseURL *url.URL
194}
195
196var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
197
198var HTML_BODY_EXTENSION string = `
199<div id="mortyheader">
200 <input type="checkbox" id="mortytoggle" autocomplete="off" />
201 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s" rel="noreferrer">original site</a>.</p><p><label for="mortytoggle">hide</label></p></div>
202</div>
203<style>
204#mortyheader { position: fixed; padding: 12px 12px 12px 0; margin: 0; box-sizing: content-box; top: 15%%; left: 0; max-width: 140px; color: #444; overflow: hidden; z-index: 110000; font-size: 12px; line-height: normal; }
205#mortyheader a { color: #3498db; font-weight: bold; }
206#mortyheader p { padding: 0 0 0.7em 0; margin: 0; }
207#mortyheader > div { padding: 8px; font-size: 12px !important; font-family: sans !important; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
208#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
209input[type=checkbox]#mortytoggle { display: none; }
210input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
211</style>
212`
213
214var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
215<meta http-equiv="X-UA-Compatible" content="IE=edge">
216`
217
218func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
219
220 if appRequestHandler(ctx) {
221 return
222 }
223
224 requestHash := popRequestParam(ctx, []byte("mortyhash"))
225
226 requestURI := popRequestParam(ctx, []byte("mortyurl"))
227
228 if requestURI == nil {
229 p.serveMainPage(ctx, 200, nil)
230 return
231 }
232
233 if p.Key != nil {
234 if !verifyRequestURI(requestURI, requestHash, p.Key) {
235 // HTTP status code 403 : Forbidden
236 p.serveMainPage(ctx, 403, errors.New(`invalid "mortyhash" parameter`))
237 return
238 }
239 }
240
241 parsedURI, err := url.Parse(string(requestURI))
242
243 if strings.HasSuffix(parsedURI.Host, ".onion") {
244 // HTTP status code 501 : Not Implemented
245 p.serveMainPage(ctx, 501, errors.New("Tor urls are not supported yet"))
246 return
247 }
248
249 if err != nil {
250 // HTTP status code 500 : Internal Server Error
251 p.serveMainPage(ctx, 500, err)
252 return
253 }
254
255 req := fasthttp.AcquireRequest()
256 defer fasthttp.ReleaseRequest(req)
257 req.SetConnectionClose()
258
259 requestURIStr := string(requestURI)
260
261 log.Println("getting", requestURIStr)
262
263 req.SetRequestURI(requestURIStr)
264 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"))
265
266 resp := fasthttp.AcquireResponse()
267 defer fasthttp.ReleaseResponse(resp)
268
269 req.Header.SetMethodBytes(ctx.Method())
270 if ctx.IsPost() || ctx.IsPut() {
271 req.SetBody(ctx.PostBody())
272 }
273
274 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
275
276 if err != nil {
277 if err == fasthttp.ErrTimeout {
278 // HTTP status code 504 : Gateway Time-Out
279 p.serveMainPage(ctx, 504, err)
280 } else {
281 // HTTP status code 500 : Internal Server Error
282 p.serveMainPage(ctx, 500, err)
283 }
284 return
285 }
286
287 if resp.StatusCode() != 200 {
288 switch resp.StatusCode() {
289 case 301, 302, 303, 307, 308:
290 loc := resp.Header.Peek("Location")
291 if loc != nil {
292 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
293 url, err := rc.ProxifyURI(loc)
294 if err == nil {
295 ctx.SetStatusCode(resp.StatusCode())
296 ctx.Response.Header.Add("Location", url)
297 log.Println("redirect to", string(loc))
298 return
299 }
300 }
301 }
302 error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
303 p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
304 return
305 }
306
307 contentTypeBytes := resp.Header.Peek("Content-Type")
308
309 if contentTypeBytes == nil {
310 // HTTP status code 503 : Service Unavailable
311 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
312 return
313 }
314
315 contentTypeString := string(contentTypeBytes)
316
317 // decode Content-Type header
318 contentType, error := contenttype.ParseContentType(contentTypeString)
319 if error != nil {
320 // HTTP status code 503 : Service Unavailable
321 p.serveMainPage(ctx, 503, errors.New("invalid content type"))
322 return
323 }
324
325 // deny access to forbidden content type
326 if FORBIDDEN_CONTENTTYPE_FILTER(contentType) {
327 // HTTP status code 403 : Forbidden
328 p.serveMainPage(ctx, 403, errors.New("forbidden content type"))
329 return
330 }
331
332 // HACK : replace */xhtml by text/html
333 if contentType.SubType == "xhtml" {
334 contentType.TopLevelType = "text"
335 contentType.SubType = "html"
336 contentType.Suffix = ""
337 }
338
339 // conversion to UTF-8
340 var responseBody []byte
341
342 if contentType.TopLevelType == "text" {
343 e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
344 if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
345 responseBody, err = e.NewDecoder().Bytes(resp.Body())
346 if err != nil {
347 // HTTP status code 503 : Service Unavailable
348 p.serveMainPage(ctx, 503, err)
349 return
350 }
351 } else {
352 responseBody = resp.Body()
353 }
354 // update the charset or specify it
355 contentType.Parameters["charset"] = "UTF-8"
356 } else {
357 responseBody = resp.Body()
358 }
359
360 //
361 contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
362
363 // set the content type
364 ctx.SetContentType(contentType.String())
365
366 switch {
367 case contentType.SubType == "css" && contentType.Suffix == "":
368 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
369 case contentType.SubType == "html" && contentType.Suffix == "":
370 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
371 default:
372 if ctx.Request.Header.Peek("Content-Disposition") != nil {
373 ctx.Response.Header.AddBytesV("Content-Disposition", ctx.Request.Header.Peek("Content-Disposition"))
374 }
375 ctx.Write(responseBody)
376 }
377}
378
379func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
380 // serve robots.txt
381 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
382 ctx.SetContentType("text/plain")
383 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
384 return true
385 }
386
387 return false
388}
389
390func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
391 param := ctx.QueryArgs().PeekBytes(paramName)
392
393 if param == nil {
394 param = ctx.PostArgs().PeekBytes(paramName)
395 if param != nil {
396 ctx.PostArgs().DelBytes(paramName)
397 }
398 } else {
399 ctx.QueryArgs().DelBytes(paramName)
400 }
401
402 return param
403}
404
405func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
406 // TODO
407
408 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
409
410 if urlSlices == nil {
411 out.Write(css)
412 return
413 }
414
415 startIndex := 0
416
417 for _, s := range urlSlices {
418 urlStart := s[4]
419 urlEnd := s[5]
420
421 if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
422 out.Write(css[startIndex:urlStart])
423 out.Write([]byte(uri))
424 startIndex = urlEnd
425 } else {
426 log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
427 }
428 }
429 if startIndex < len(css) {
430 out.Write(css[startIndex:len(css)])
431 }
432}
433
434func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
435 r := bytes.NewReader(htmlDoc)
436 decoder := html.NewTokenizer(r)
437 decoder.AllowCDATA(true)
438
439 unsafeElements := make([][]byte, 0, 8)
440 state := STATE_DEFAULT
441 for {
442 token := decoder.Next()
443 if token == html.ErrorToken {
444 err := decoder.Err()
445 if err != io.EOF {
446 log.Println("failed to parse HTML:")
447 }
448 break
449 }
450
451 if len(unsafeElements) == 0 {
452
453 switch token {
454 case html.StartTagToken, html.SelfClosingTagToken:
455 tag, hasAttrs := decoder.TagName()
456 safe := !inArray(tag, UNSAFE_ELEMENTS)
457 if !safe {
458 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
459 var unsafeTag []byte = make([]byte, len(tag))
460 copy(unsafeTag, tag)
461 unsafeElements = append(unsafeElements, unsafeTag)
462 }
463 break
464 }
465 if bytes.Equal(tag, []byte("base")) {
466 for {
467 attrName, attrValue, moreAttr := decoder.TagAttr()
468 if bytes.Equal(attrName, []byte("href")) {
469 parsedURI, err := url.Parse(string(attrValue))
470 if err == nil {
471 rc.BaseURL = parsedURI
472 }
473 }
474 if !moreAttr {
475 break
476 }
477 }
478 break
479 }
480 if bytes.Equal(tag, []byte("noscript")) {
481 state = STATE_IN_NOSCRIPT
482 break
483 }
484 var attrs [][][]byte
485 if hasAttrs {
486 for {
487 attrName, attrValue, moreAttr := decoder.TagAttr()
488 attrs = append(attrs, [][]byte{
489 attrName,
490 attrValue,
491 []byte(html.EscapeString(string(attrValue))),
492 })
493 if !moreAttr {
494 break
495 }
496 }
497 }
498 if bytes.Equal(tag, []byte("link")) {
499 sanitizeLinkTag(rc, out, attrs)
500 break
501 }
502
503 if bytes.Equal(tag, []byte("meta")) {
504 sanitizeMetaTag(rc, out, attrs)
505 break
506 }
507
508 fmt.Fprintf(out, "<%s", tag)
509
510 if hasAttrs {
511 sanitizeAttrs(rc, out, attrs)
512 }
513
514 if token == html.SelfClosingTagToken {
515 fmt.Fprintf(out, " />")
516 } else {
517 fmt.Fprintf(out, ">")
518 if bytes.Equal(tag, []byte("style")) {
519 state = STATE_IN_STYLE
520 }
521 }
522
523 if bytes.Equal(tag, []byte("head")) {
524 fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
525 }
526
527 if bytes.Equal(tag, []byte("form")) {
528 var formURL *url.URL
529 for _, attr := range attrs {
530 if bytes.Equal(attr[0], []byte("action")) {
531 formURL, _ = url.Parse(string(attr[1]))
532 formURL = mergeURIs(rc.BaseURL, formURL)
533 break
534 }
535 }
536 if formURL == nil {
537 formURL = rc.BaseURL
538 }
539 urlStr := formURL.String()
540 var key string
541 if rc.Key != nil {
542 key = hash(urlStr, rc.Key)
543 }
544 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
545
546 }
547
548 case html.EndTagToken:
549 tag, _ := decoder.TagName()
550 writeEndTag := true
551 switch string(tag) {
552 case "body":
553 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
554 case "style":
555 state = STATE_DEFAULT
556 case "noscript":
557 state = STATE_DEFAULT
558 writeEndTag = false
559 }
560 // skip noscript tags - only the tag, not the content, because javascript is sanitized
561 if writeEndTag {
562 fmt.Fprintf(out, "</%s>", tag)
563 }
564
565 case html.TextToken:
566 switch state {
567 case STATE_DEFAULT:
568 fmt.Fprintf(out, "%s", decoder.Raw())
569 case STATE_IN_STYLE:
570 sanitizeCSS(rc, out, decoder.Raw())
571 case STATE_IN_NOSCRIPT:
572 sanitizeHTML(rc, out, decoder.Raw())
573 }
574
575 case html.CommentToken:
576 // ignore comment. TODO : parse IE conditional comment
577
578 case html.DoctypeToken:
579 out.Write(decoder.Raw())
580 }
581 } else {
582 switch token {
583 case html.StartTagToken:
584 tag, _ := decoder.TagName()
585 if inArray(tag, UNSAFE_ELEMENTS) {
586 unsafeElements = append(unsafeElements, tag)
587 }
588
589 case html.EndTagToken:
590 tag, _ := decoder.TagName()
591 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
592 unsafeElements = unsafeElements[:len(unsafeElements)-1]
593 }
594 }
595 }
596 }
597}
598
599func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
600 exclude := false
601 for _, attr := range attrs {
602 attrName := attr[0]
603 attrValue := attr[1]
604 if bytes.Equal(attrName, []byte("rel")) {
605 if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
606 exclude = true
607 break
608 }
609 }
610 if bytes.Equal(attrName, []byte("as")) {
611 if bytes.Equal(attrValue, []byte("script")) {
612 exclude = true
613 break
614 }
615 }
616 }
617
618 if !exclude {
619 out.Write([]byte("<link"))
620 for _, attr := range attrs {
621 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
622 }
623 out.Write([]byte(">"))
624 }
625}
626
627func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
628 var http_equiv []byte
629 var content []byte
630
631 for _, attr := range attrs {
632 attrName := attr[0]
633 attrValue := attr[1]
634 if bytes.Equal(attrName, []byte("http-equiv")) {
635 http_equiv = bytes.ToLower(attrValue)
636 // exclude some <meta http-equiv="..." ..>
637 if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
638 return
639 }
640 }
641 if bytes.Equal(attrName, []byte("content")) {
642 content = attrValue
643 }
644 if bytes.Equal(attrName, []byte("charset")) {
645 // exclude <meta charset="...">
646 return
647 }
648 }
649
650 out.Write([]byte("<meta"))
651 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
652 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
653 contentUrl := content[urlIndex+4:]
654 // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
655 if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
656 if contentUrl[0] == contentUrl[len(contentUrl)-1] {
657 contentUrl = contentUrl[1 : len(contentUrl)-1]
658 }
659 }
660 // output proxify result
661 if uri, err := rc.ProxifyURI(contentUrl); err == nil {
662 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
663 }
664 } else {
665 if len(http_equiv) > 0 {
666 fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
667 }
668 sanitizeAttrs(rc, out, attrs)
669 }
670 out.Write([]byte(">"))
671}
672
673func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
674 for _, attr := range attrs {
675 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
676 }
677}
678
679func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
680 if inArray(attrName, SAFE_ATTRIBUTES) {
681 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
682 return
683 }
684 switch string(attrName) {
685 case "src", "href", "action":
686 if uri, err := rc.ProxifyURI(attrValue); err == nil {
687 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
688 } else {
689 log.Println("cannot proxify uri:", string(attrValue))
690 }
691 case "style":
692 cssAttr := bytes.NewBuffer(nil)
693 sanitizeCSS(rc, cssAttr, attrValue)
694 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
695 }
696}
697
698func mergeURIs(u1, u2 *url.URL) *url.URL {
699 return u1.ResolveReference(u2)
700}
701
702// Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
703// avoid memory allocation (except for the scheme)
704func sanitizeURI(uri []byte) ([]byte, string) {
705 first_rune_index := 0
706 first_rune_seen := false
707 scheme_last_index := -1
708 buffer := bytes.NewBuffer(make([]byte, 0, 10))
709
710 // remove trailing space and special characters
711 uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
712
713 // loop over byte by byte
714 for i, c := range uri {
715 // ignore special characters and space (c <= 32)
716 if c > 32 {
717 // append to the lower case of the rune to buffer
718 if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
719 c = c + 'a' - 'A'
720 }
721
722 buffer.WriteByte(c)
723
724 // update the first rune index that is not a special rune
725 if !first_rune_seen {
726 first_rune_index = i
727 first_rune_seen = true
728 }
729
730 if c == ':' {
731 // colon rune found, we have found the scheme
732 scheme_last_index = i
733 break
734 } else if c == '/' || c == '?' || c == '\\' || c == '#' {
735 // special case : most probably a relative URI
736 break
737 }
738 }
739 }
740
741 if scheme_last_index != -1 {
742 // scheme found
743 // copy the "lower case without special runes scheme" before the ":" rune
744 scheme_start_index := scheme_last_index - buffer.Len() + 1
745 copy(uri[scheme_start_index:], buffer.Bytes())
746 // and return the result
747 return uri[scheme_start_index:], buffer.String()
748 } else {
749 // scheme NOT found
750 return uri[first_rune_index:], ""
751 }
752}
753
754func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
755 // sanitize URI
756 uri, scheme := sanitizeURI(uri)
757
758 // remove javascript protocol
759 if scheme == "javascript:" {
760 return "", nil
761 }
762
763 // TODO check malicious data: - e.g. data:script
764 if scheme == "data:" {
765 if bytes.HasPrefix(uri, []byte("data:image/png")) ||
766 bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
767 bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
768 bytes.HasPrefix(uri, []byte("data:image/gif")) ||
769 bytes.HasPrefix(uri, []byte("data:image/webp")) {
770 // should be safe
771 return string(uri), nil
772 } else {
773 // unsafe data
774 return "", nil
775 }
776 }
777
778 // parse the uri
779 u, err := url.Parse(string(uri))
780 if err != nil {
781 return "", err
782 }
783
784 // get the fragment (with the prefix "#")
785 fragment := ""
786 if len(u.Fragment) > 0 {
787 fragment = "#" + u.Fragment
788 }
789
790 // reset the fragment: it is not included in the mortyurl
791 u.Fragment = ""
792
793 // merge the URI with the document URI
794 u = mergeURIs(rc.BaseURL, u)
795
796 // simple internal link ?
797 // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
798 if u.Scheme == rc.BaseURL.Scheme &&
799 (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
800 u.Host == rc.BaseURL.Host &&
801 u.Path == rc.BaseURL.Path &&
802 u.RawQuery == rc.BaseURL.RawQuery {
803 // the fragment is the only difference between the document URI and the uri parameter
804 return fragment, nil
805 }
806
807 // return full URI and fragment (if not empty)
808 morty_uri := u.String()
809
810 if rc.Key == nil {
811 return fmt.Sprintf("./?mortyurl=%s%s", url.QueryEscape(morty_uri), fragment), nil
812 }
813 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s%s", hash(morty_uri, rc.Key), url.QueryEscape(morty_uri), fragment), nil
814}
815
816func inArray(b []byte, a [][]byte) bool {
817 for _, b2 := range a {
818 if bytes.Equal(b, b2) {
819 return true
820 }
821 }
822 return false
823}
824
825func hash(msg string, key []byte) string {
826 mac := hmac.New(sha256.New, key)
827 mac.Write([]byte(msg))
828 return hex.EncodeToString(mac.Sum(nil))
829}
830
831func verifyRequestURI(uri, hashMsg, key []byte) bool {
832 h := make([]byte, hex.DecodedLen(len(hashMsg)))
833 _, err := hex.Decode(h, hashMsg)
834 if err != nil {
835 log.Println("hmac error:", err)
836 return false
837 }
838 mac := hmac.New(sha256.New, key)
839 mac.Write(uri)
840 return hmac.Equal(h, mac.Sum(nil))
841}
842
843func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
844 ctx.SetContentType("text/html")
845 ctx.SetStatusCode(statusCode)
846 ctx.Write([]byte(`<!doctype html>
847<head>
848<title>MortyProxy</title>
849<meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
850<style>
851html { height: 100%; }
852body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
853input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
854input[placeholder] { width:80%; }
855a { text-decoration: none; #2980b9; }
856h1, h2 { font-weight: 200; margin-bottom: 2rem; }
857h1 { font-size: 3em; }
858.container { flex:1; min-height: 100%; margin-bottom: 1em; }
859.footer { margin: 1em; }
860.footer p { font-size: 0.8em; }
861</style>
862</head>
863<body>
864 <div class="container">
865 <h1>MortyProxy</h1>
866`))
867 if err != nil {
868 log.Println("error:", err)
869 ctx.Write([]byte("<h2>Error: "))
870 ctx.Write([]byte(html.EscapeString(err.Error())))
871 ctx.Write([]byte("</h2>"))
872 }
873 if p.Key == nil {
874 ctx.Write([]byte(`
875 <form action="post">
876 Visit url: <input placeholder="https://url.." name="mortyurl" autofocus />
877 <input type="submit" value="go" />
878 </form>`))
879 } else {
880 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
881 }
882 ctx.Write([]byte(`
883 </div>
884 <div class="footer">
885 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
886 <a href="https://github.com/asciimoo/morty">view on github</a>
887 </p>
888 </div>
889</body>
890</html>`))
891}
892
893func main() {
894
895 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
896 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
897 ipv6 := flag.Bool("ipv6", false, "Allow IPv6 HTTP requests")
898 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
899 flag.Parse()
900
901 if *ipv6 {
902 CLIENT.Dial = fasthttp.DialDualStack
903 }
904
905 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
906
907 if *key != "" {
908 p.Key = []byte(*key)
909 }
910
911 log.Println("listening on", *listen)
912
913 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
914 log.Fatal("Error in ListenAndServe:", err)
915 }
916}
Note: See TracBrowser for help on using the repository browser.