source: code/trunk/morty.go@ 21

Last change on this file since 21 was 21, checked in by asciimoo, 9 years ago

[fix] attribute escaping - better solution in the future: https://github.com/golang/go/issues/17667

File size: 15.8 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
17 "time"
18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 Dial: fasthttp.DialDualStack,
32 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
33}
34
35var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
36
37var UNSAFE_ELEMENTS [][]byte = [][]byte{
38 []byte("applet"),
39 []byte("canvas"),
40 []byte("embed"),
41 //[]byte("iframe"),
42 []byte("script"),
43}
44
45var SAFE_ATTRIBUTES [][]byte = [][]byte{
46 []byte("abbr"),
47 []byte("accesskey"),
48 []byte("align"),
49 []byte("alt"),
50 []byte("as"),
51 []byte("autocomplete"),
52 []byte("charset"),
53 []byte("checked"),
54 []byte("class"),
55 []byte("content"),
56 []byte("contenteditable"),
57 []byte("contextmenu"),
58 []byte("dir"),
59 []byte("for"),
60 []byte("height"),
61 []byte("hidden"),
62 []byte("id"),
63 []byte("lang"),
64 []byte("media"),
65 []byte("method"),
66 []byte("name"),
67 []byte("nowrap"),
68 []byte("placeholder"),
69 []byte("property"),
70 []byte("rel"),
71 []byte("spellcheck"),
72 []byte("tabindex"),
73 []byte("target"),
74 []byte("title"),
75 []byte("translate"),
76 []byte("type"),
77 []byte("value"),
78 []byte("width"),
79}
80
81var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
82 []byte("area"),
83 []byte("base"),
84 []byte("br"),
85 []byte("col"),
86 []byte("embed"),
87 []byte("hr"),
88 []byte("img"),
89 []byte("input"),
90 []byte("keygen"),
91 []byte("link"),
92 []byte("meta"),
93 []byte("param"),
94 []byte("source"),
95 []byte("track"),
96 []byte("wbr"),
97}
98
99type Proxy struct {
100 Key []byte
101 RequestTimeout time.Duration
102}
103
104type RequestConfig struct {
105 Key []byte
106 baseURL *url.URL
107}
108
109var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
110
111var HTML_BODY_EXTENSION string = `
112<div id="mortyheader">
113 <input type="checkbox" id="mortytoggle" autocomplete="off" />
114 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
115</div>
116<style>
117#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
118#mortyheader a { color: #3498db; }
119#mortyheader p { padding: 0; margin: 0; }
120#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
121#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
122input[type=checkbox]#mortytoggle { display: none; }
123input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
124</style>
125`
126
127func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
128
129 if appRequestHandler(ctx) {
130 return
131 }
132
133 requestHash := popRequestParam(ctx, []byte("mortyhash"))
134
135 requestURI := popRequestParam(ctx, []byte("mortyurl"))
136
137 if requestURI == nil {
138 p.serveMainPage(ctx, nil)
139 return
140 }
141
142 if p.Key != nil {
143 if !verifyRequestURI(requestURI, requestHash, p.Key) {
144 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
145 return
146 }
147 }
148
149 parsedURI, err := url.Parse(string(requestURI))
150
151 if strings.HasSuffix(parsedURI.Host, ".onion") {
152 p.serveMainPage(ctx, errors.New("Tor urls are not supported yet"))
153 return
154 }
155
156 if err != nil {
157 p.serveMainPage(ctx, err)
158 return
159 }
160
161 req := fasthttp.AcquireRequest()
162 defer fasthttp.ReleaseRequest(req)
163 req.SetConnectionClose()
164
165 reqQuery := parsedURI.Query()
166 ctx.QueryArgs().VisitAll(func(key, value []byte) {
167 reqQuery.Add(string(key), string(value))
168 })
169
170 parsedURI.RawQuery = reqQuery.Encode()
171
172 uriStr := parsedURI.String()
173
174 log.Println("getting", uriStr)
175
176 req.SetRequestURI(uriStr)
177 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
178
179 resp := fasthttp.AcquireResponse()
180 defer fasthttp.ReleaseResponse(resp)
181
182 req.Header.SetMethodBytes(ctx.Method())
183 if ctx.IsPost() || ctx.IsPut() {
184 req.SetBody(ctx.PostBody())
185 }
186
187 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
188
189 if err != nil {
190 p.serveMainPage(ctx, err)
191 return
192 }
193
194 if resp.StatusCode() != 200 {
195 switch resp.StatusCode() {
196 case 301, 302, 303, 307, 308:
197 loc := resp.Header.Peek("Location")
198 if loc != nil {
199 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
200 if err == nil {
201 ctx.SetStatusCode(resp.StatusCode())
202 ctx.Response.Header.Add("Location", url)
203 log.Println("redirect to", string(loc))
204 return
205 }
206 }
207 }
208 log.Println("invalid request:", resp.StatusCode())
209 return
210 }
211
212 contentType := resp.Header.Peek("Content-Type")
213
214 if contentType == nil {
215 p.serveMainPage(ctx, errors.New("invalid content type"))
216 return
217 }
218
219 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
220 p.serveMainPage(ctx, errors.New("forbidden content type"))
221 return
222 }
223
224 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
225
226 var responseBody []byte
227
228 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
229 var err error
230 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
231 if err != nil {
232 p.serveMainPage(ctx, err)
233 return
234 }
235 } else {
236 responseBody = resp.Body()
237 }
238
239 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
240
241 switch {
242 case bytes.Contains(contentType, []byte("css")):
243 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
244 case bytes.Contains(contentType, []byte("html")):
245 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
246 default:
247 ctx.Write(responseBody)
248 }
249}
250
251func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
252 // serve robots.txt
253 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
254 ctx.SetContentType("text/plain")
255 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
256 return true
257 }
258
259 return false
260}
261
262func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
263 param := ctx.QueryArgs().PeekBytes(paramName)
264
265 if param == nil {
266 param = ctx.PostArgs().PeekBytes(paramName)
267 if param != nil {
268 ctx.PostArgs().DelBytes(paramName)
269 }
270 } else {
271 ctx.QueryArgs().DelBytes(paramName)
272 }
273
274 return param
275}
276
277func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
278 // TODO
279
280 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
281
282 if urlSlices == nil {
283 out.Write(css)
284 return
285 }
286
287 startIndex := 0
288
289 for _, s := range urlSlices {
290 urlStart := s[4]
291 urlEnd := s[5]
292
293 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
294 out.Write(css[startIndex:urlStart])
295 out.Write([]byte(uri))
296 startIndex = urlEnd
297 } else {
298 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
299 }
300 }
301 if startIndex < len(css) {
302 out.Write(css[startIndex:len(css)])
303 }
304}
305
306func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
307 r := bytes.NewReader(htmlDoc)
308 decoder := html.NewTokenizer(r)
309 decoder.AllowCDATA(true)
310
311 unsafeElements := make([][]byte, 0, 8)
312 state := STATE_DEFAULT
313
314 for {
315 token := decoder.Next()
316 if token == html.ErrorToken {
317 err := decoder.Err()
318 if err != io.EOF {
319 log.Println("failed to parse HTML:")
320 }
321 break
322 }
323
324 if len(unsafeElements) == 0 {
325
326 switch token {
327 case html.StartTagToken, html.SelfClosingTagToken:
328 tag, hasAttrs := decoder.TagName()
329 safe := !inArray(tag, UNSAFE_ELEMENTS)
330 if !safe {
331 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
332 var unsafeTag []byte = make([]byte, len(tag))
333 copy(unsafeTag, tag)
334 unsafeElements = append(unsafeElements, unsafeTag)
335 }
336 break
337 }
338 if bytes.Equal(tag, []byte("noscript")) {
339 state = STATE_IN_NOSCRIPT
340 break
341 }
342 var attrs [][][]byte
343 if hasAttrs {
344 for {
345 attrName, attrValue, moreAttr := decoder.TagAttr()
346 attrs = append(attrs, [][]byte{
347 attrName,
348 attrValue,
349 []byte(html.EscapeString(string(attrValue))),
350 })
351 if !moreAttr {
352 break
353 }
354 }
355 }
356 if bytes.Equal(tag, []byte("link")) {
357 sanitizeLinkTag(rc, out, attrs)
358 break
359 }
360
361 fmt.Fprintf(out, "<%s", tag)
362
363 if hasAttrs {
364 if bytes.Equal(tag, []byte("meta")) {
365 sanitizeMetaAttrs(rc, out, attrs)
366 } else {
367 sanitizeAttrs(rc, out, attrs)
368 }
369 }
370
371 if token == html.SelfClosingTagToken {
372 fmt.Fprintf(out, " />")
373 } else {
374 fmt.Fprintf(out, ">")
375 if bytes.Equal(tag, []byte("style")) {
376 state = STATE_IN_STYLE
377 }
378 }
379
380 if bytes.Equal(tag, []byte("form")) {
381 var formURL *url.URL
382 for _, attr := range attrs {
383 if bytes.Equal(attr[0], []byte("action")) {
384 formURL, _ = url.Parse(string(attr[1]))
385 mergeURIs(rc.baseURL, formURL)
386 break
387 }
388 }
389 if formURL == nil {
390 formURL = rc.baseURL
391 }
392 urlStr := formURL.String()
393 var key string
394 if rc.Key != nil {
395 key = hash(urlStr, rc.Key)
396 }
397 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
398
399 }
400
401 case html.EndTagToken:
402 tag, _ := decoder.TagName()
403 writeEndTag := true
404 switch string(tag) {
405 case "body":
406 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
407 case "style":
408 state = STATE_DEFAULT
409 case "noscript":
410 state = STATE_DEFAULT
411 writeEndTag = false
412 }
413 // skip noscript tags - only the tag, not the content, because javascript is sanitized
414 if writeEndTag {
415 fmt.Fprintf(out, "</%s>", tag)
416 }
417
418 case html.TextToken:
419 switch state {
420 case STATE_DEFAULT:
421 fmt.Fprintf(out, "%s", decoder.Raw())
422 case STATE_IN_STYLE:
423 sanitizeCSS(rc, out, decoder.Raw())
424 case STATE_IN_NOSCRIPT:
425 sanitizeHTML(rc, out, decoder.Raw())
426 }
427
428 case html.DoctypeToken, html.CommentToken:
429 out.Write(decoder.Raw())
430 }
431 } else {
432 switch token {
433 case html.StartTagToken:
434 tag, _ := decoder.TagName()
435 if inArray(tag, UNSAFE_ELEMENTS) {
436 unsafeElements = append(unsafeElements, tag)
437 }
438
439 case html.EndTagToken:
440 tag, _ := decoder.TagName()
441 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
442 unsafeElements = unsafeElements[:len(unsafeElements)-1]
443 }
444 }
445 }
446 }
447}
448
449func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
450 exclude := false
451 for _, attr := range attrs {
452 attrName := attr[0]
453 attrValue := attr[1]
454 if bytes.Equal(attrName, []byte("rel")) {
455 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
456 exclude = true
457 break
458 }
459 }
460 if bytes.Equal(attrName, []byte("as")) {
461 if bytes.Equal(attrValue, []byte("script")) {
462 exclude = true
463 break
464 }
465 }
466 }
467
468 if !exclude {
469 out.Write([]byte("<link"))
470 for _, attr := range attrs {
471 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
472 }
473 out.Write([]byte(">"))
474 }
475}
476
477func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
478 var http_equiv []byte
479 var content []byte
480
481 for _, attr := range attrs {
482 attrName := attr[0]
483 attrValue := attr[1]
484 if bytes.Equal(attrName, []byte("http-equiv")) {
485 http_equiv = bytes.ToLower(attrValue)
486 }
487 if bytes.Equal(attrName, []byte("content")) {
488 content = attrValue
489 }
490 }
491
492 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
493 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
494 contentUrl := content[urlIndex+4:]
495 if uri, err := proxifyURI(rc, string(contentUrl)); err == nil {
496 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
497 }
498 } else {
499 sanitizeAttrs(rc, out, attrs)
500 }
501
502}
503
504func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
505 for _, attr := range attrs {
506 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
507 }
508}
509
510func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
511 if inArray(attrName, SAFE_ATTRIBUTES) {
512 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
513 return
514 }
515 switch string(attrName) {
516 case "src", "href", "action":
517 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
518 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
519 } else {
520 log.Println("cannot proxify uri:", attrValue)
521 }
522 case "style":
523 cssAttr := bytes.NewBuffer(nil)
524 sanitizeCSS(rc, cssAttr, attrValue)
525 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
526 }
527}
528
529func mergeURIs(u1, u2 *url.URL) {
530 if u2.Scheme == "" || u2.Scheme == "//" {
531 u2.Scheme = u1.Scheme
532 }
533 if u2.Host == "" && u1.Path != "" {
534 u2.Host = u1.Host
535 if len(u2.Path) == 0 || u2.Path[0] != '/' {
536 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
537 }
538 }
539}
540
541func proxifyURI(rc *RequestConfig, uri string) (string, error) {
542 // TODO check malicious data: - e.g. data:script
543 if strings.HasPrefix(uri, "data:") {
544 return uri, nil
545 }
546
547 if len(uri) > 0 && uri[0] == '#' {
548 return uri, nil
549 }
550
551 u, err := url.Parse(uri)
552 if err != nil {
553 return "", err
554 }
555 mergeURIs(rc.baseURL, u)
556
557 uri = u.String()
558
559 if rc.Key == nil {
560 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
561 }
562 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
563}
564
565func inArray(b []byte, a [][]byte) bool {
566 for _, b2 := range a {
567 if bytes.Equal(b, b2) {
568 return true
569 }
570 }
571 return false
572}
573
574func hash(msg string, key []byte) string {
575 mac := hmac.New(sha256.New, key)
576 mac.Write([]byte(msg))
577 return hex.EncodeToString(mac.Sum(nil))
578}
579
580func verifyRequestURI(uri, hashMsg, key []byte) bool {
581 h := make([]byte, hex.DecodedLen(len(hashMsg)))
582 _, err := hex.Decode(h, hashMsg)
583 if err != nil {
584 log.Println("hmac error:", err)
585 return false
586 }
587 mac := hmac.New(sha256.New, key)
588 mac.Write(uri)
589 return hmac.Equal(h, mac.Sum(nil))
590}
591
592func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
593 ctx.SetContentType("text/html")
594 ctx.Write([]byte(`<!doctype html>
595<head>
596<title>MortyProxy</title>
597<style>
598body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
599input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
600a { text-decoration: none; #2980b9; }
601h1, h2 { font-weight: 200; margin-bottom: 2rem; }
602h1 { font-size: 3em; }
603.footer { position: absolute; bottom: 2em; width: 100%; }
604.footer p { font-size: 0.8em; }
605
606</style>
607</head>
608<body>
609 <h1>MortyProxy</h1>`))
610 if err != nil {
611 ctx.SetStatusCode(404)
612 log.Println("error:", err)
613 ctx.Write([]byte("<h2>Error: "))
614 ctx.Write([]byte(html.EscapeString(err.Error())))
615 ctx.Write([]byte("</h2>"))
616 } else {
617 ctx.SetStatusCode(200)
618 }
619 if p.Key == nil {
620 ctx.Write([]byte(`
621<form action="post">
622 Visit url: <input placeholder="https://url.." name="mortyurl" />
623 <input type="submit" value="go" />
624</form>`))
625 } else {
626 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
627 }
628 ctx.Write([]byte(`
629<div class="footer">
630 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
631 <a href="https://github.com/asciimoo/morty">view on github</a>
632 </p>
633</div>
634</body>
635</html>`))
636}
637
638func main() {
639
640 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
641 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
642 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
643 flag.Parse()
644
645 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
646
647 if *key != "" {
648 p.Key = []byte(*key)
649 }
650
651 log.Println("listening on", *listen)
652
653 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
654 log.Fatal("Error in ListenAndServe:", err)
655 }
656}
Note: See TracBrowser for help on using the repository browser.