source: code/trunk/morty.go@ 23

Last change on this file since 23 was 23, checked in by asciimoo, 9 years ago

[mod] proxification refactor

File size: 15.8 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
17 "time"
18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 Dial: fasthttp.DialDualStack,
32 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
33}
34
35var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
36
37var UNSAFE_ELEMENTS [][]byte = [][]byte{
38 []byte("applet"),
39 []byte("canvas"),
40 []byte("embed"),
41 //[]byte("iframe"),
42 []byte("script"),
43}
44
45var SAFE_ATTRIBUTES [][]byte = [][]byte{
46 []byte("abbr"),
47 []byte("accesskey"),
48 []byte("align"),
49 []byte("alt"),
50 []byte("as"),
51 []byte("autocomplete"),
52 []byte("charset"),
53 []byte("checked"),
54 []byte("class"),
55 []byte("content"),
56 []byte("contenteditable"),
57 []byte("contextmenu"),
58 []byte("dir"),
59 []byte("for"),
60 []byte("height"),
61 []byte("hidden"),
62 []byte("id"),
63 []byte("lang"),
64 []byte("media"),
65 []byte("method"),
66 []byte("name"),
67 []byte("nowrap"),
68 []byte("placeholder"),
69 []byte("property"),
70 []byte("rel"),
71 []byte("spellcheck"),
72 []byte("tabindex"),
73 []byte("target"),
74 []byte("title"),
75 []byte("translate"),
76 []byte("type"),
77 []byte("value"),
78 []byte("width"),
79}
80
81var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
82 []byte("area"),
83 []byte("base"),
84 []byte("br"),
85 []byte("col"),
86 []byte("embed"),
87 []byte("hr"),
88 []byte("img"),
89 []byte("input"),
90 []byte("keygen"),
91 []byte("link"),
92 []byte("meta"),
93 []byte("param"),
94 []byte("source"),
95 []byte("track"),
96 []byte("wbr"),
97}
98
99type Proxy struct {
100 Key []byte
101 RequestTimeout time.Duration
102}
103
104type RequestConfig struct {
105 Key []byte
106 BaseURL *url.URL
107}
108
109var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
110
111var HTML_BODY_EXTENSION string = `
112<div id="mortyheader">
113 <input type="checkbox" id="mortytoggle" autocomplete="off" />
114 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
115</div>
116<style>
117#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
118#mortyheader a { color: #3498db; }
119#mortyheader p { padding: 0; margin: 0; }
120#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
121#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
122input[type=checkbox]#mortytoggle { display: none; }
123input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
124</style>
125`
126
127func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
128
129 if appRequestHandler(ctx) {
130 return
131 }
132
133 requestHash := popRequestParam(ctx, []byte("mortyhash"))
134
135 requestURI := popRequestParam(ctx, []byte("mortyurl"))
136
137 if requestURI == nil {
138 p.serveMainPage(ctx, nil)
139 return
140 }
141
142 if p.Key != nil {
143 if !verifyRequestURI(requestURI, requestHash, p.Key) {
144 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
145 return
146 }
147 }
148
149 parsedURI, err := url.Parse(string(requestURI))
150
151 if strings.HasSuffix(parsedURI.Host, ".onion") {
152 p.serveMainPage(ctx, errors.New("Tor urls are not supported yet"))
153 return
154 }
155
156 if err != nil {
157 p.serveMainPage(ctx, err)
158 return
159 }
160
161 req := fasthttp.AcquireRequest()
162 defer fasthttp.ReleaseRequest(req)
163 req.SetConnectionClose()
164
165 reqQuery := parsedURI.Query()
166 ctx.QueryArgs().VisitAll(func(key, value []byte) {
167 reqQuery.Add(string(key), string(value))
168 })
169
170 parsedURI.RawQuery = reqQuery.Encode()
171
172 uriStr := parsedURI.String()
173
174 log.Println("getting", uriStr)
175
176 req.SetRequestURI(uriStr)
177 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
178
179 resp := fasthttp.AcquireResponse()
180 defer fasthttp.ReleaseResponse(resp)
181
182 req.Header.SetMethodBytes(ctx.Method())
183 if ctx.IsPost() || ctx.IsPut() {
184 req.SetBody(ctx.PostBody())
185 }
186
187 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
188
189 if err != nil {
190 p.serveMainPage(ctx, err)
191 return
192 }
193
194 if resp.StatusCode() != 200 {
195 switch resp.StatusCode() {
196 case 301, 302, 303, 307, 308:
197 loc := resp.Header.Peek("Location")
198 if loc != nil {
199 rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
200 url, err := rc.ProxifyURI(string(loc))
201 if err == nil {
202 ctx.SetStatusCode(resp.StatusCode())
203 ctx.Response.Header.Add("Location", url)
204 log.Println("redirect to", string(loc))
205 return
206 }
207 }
208 }
209 log.Println("invalid request:", resp.StatusCode())
210 return
211 }
212
213 contentType := resp.Header.Peek("Content-Type")
214
215 if contentType == nil {
216 p.serveMainPage(ctx, errors.New("invalid content type"))
217 return
218 }
219
220 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
221 p.serveMainPage(ctx, errors.New("forbidden content type"))
222 return
223 }
224
225 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
226
227 var responseBody []byte
228
229 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
230 var err error
231 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
232 if err != nil {
233 p.serveMainPage(ctx, err)
234 return
235 }
236 } else {
237 responseBody = resp.Body()
238 }
239
240 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
241
242 switch {
243 case bytes.Contains(contentType, []byte("css")):
244 sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
245 case bytes.Contains(contentType, []byte("html")):
246 sanitizeHTML(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
247 default:
248 ctx.Write(responseBody)
249 }
250}
251
252func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
253 // serve robots.txt
254 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
255 ctx.SetContentType("text/plain")
256 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
257 return true
258 }
259
260 return false
261}
262
263func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
264 param := ctx.QueryArgs().PeekBytes(paramName)
265
266 if param == nil {
267 param = ctx.PostArgs().PeekBytes(paramName)
268 if param != nil {
269 ctx.PostArgs().DelBytes(paramName)
270 }
271 } else {
272 ctx.QueryArgs().DelBytes(paramName)
273 }
274
275 return param
276}
277
278func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
279 // TODO
280
281 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
282
283 if urlSlices == nil {
284 out.Write(css)
285 return
286 }
287
288 startIndex := 0
289
290 for _, s := range urlSlices {
291 urlStart := s[4]
292 urlEnd := s[5]
293
294 if uri, err := rc.ProxifyURI(string(css[urlStart:urlEnd])); err == nil {
295 out.Write(css[startIndex:urlStart])
296 out.Write([]byte(uri))
297 startIndex = urlEnd
298 } else {
299 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
300 }
301 }
302 if startIndex < len(css) {
303 out.Write(css[startIndex:len(css)])
304 }
305}
306
307func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
308 r := bytes.NewReader(htmlDoc)
309 decoder := html.NewTokenizer(r)
310 decoder.AllowCDATA(true)
311
312 unsafeElements := make([][]byte, 0, 8)
313 state := STATE_DEFAULT
314
315 for {
316 token := decoder.Next()
317 if token == html.ErrorToken {
318 err := decoder.Err()
319 if err != io.EOF {
320 log.Println("failed to parse HTML:")
321 }
322 break
323 }
324
325 if len(unsafeElements) == 0 {
326
327 switch token {
328 case html.StartTagToken, html.SelfClosingTagToken:
329 tag, hasAttrs := decoder.TagName()
330 safe := !inArray(tag, UNSAFE_ELEMENTS)
331 if !safe {
332 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
333 var unsafeTag []byte = make([]byte, len(tag))
334 copy(unsafeTag, tag)
335 unsafeElements = append(unsafeElements, unsafeTag)
336 }
337 break
338 }
339 if bytes.Equal(tag, []byte("noscript")) {
340 state = STATE_IN_NOSCRIPT
341 break
342 }
343 var attrs [][][]byte
344 if hasAttrs {
345 for {
346 attrName, attrValue, moreAttr := decoder.TagAttr()
347 attrs = append(attrs, [][]byte{
348 attrName,
349 attrValue,
350 []byte(html.EscapeString(string(attrValue))),
351 })
352 if !moreAttr {
353 break
354 }
355 }
356 }
357 if bytes.Equal(tag, []byte("link")) {
358 sanitizeLinkTag(rc, out, attrs)
359 break
360 }
361
362 fmt.Fprintf(out, "<%s", tag)
363
364 if hasAttrs {
365 if bytes.Equal(tag, []byte("meta")) {
366 sanitizeMetaAttrs(rc, out, attrs)
367 } else {
368 sanitizeAttrs(rc, out, attrs)
369 }
370 }
371
372 if token == html.SelfClosingTagToken {
373 fmt.Fprintf(out, " />")
374 } else {
375 fmt.Fprintf(out, ">")
376 if bytes.Equal(tag, []byte("style")) {
377 state = STATE_IN_STYLE
378 }
379 }
380
381 if bytes.Equal(tag, []byte("form")) {
382 var formURL *url.URL
383 for _, attr := range attrs {
384 if bytes.Equal(attr[0], []byte("action")) {
385 formURL, _ = url.Parse(string(attr[1]))
386 mergeURIs(rc.BaseURL, formURL)
387 break
388 }
389 }
390 if formURL == nil {
391 formURL = rc.BaseURL
392 }
393 urlStr := formURL.String()
394 var key string
395 if rc.Key != nil {
396 key = hash(urlStr, rc.Key)
397 }
398 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
399
400 }
401
402 case html.EndTagToken:
403 tag, _ := decoder.TagName()
404 writeEndTag := true
405 switch string(tag) {
406 case "body":
407 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.BaseURL.String())
408 case "style":
409 state = STATE_DEFAULT
410 case "noscript":
411 state = STATE_DEFAULT
412 writeEndTag = false
413 }
414 // skip noscript tags - only the tag, not the content, because javascript is sanitized
415 if writeEndTag {
416 fmt.Fprintf(out, "</%s>", tag)
417 }
418
419 case html.TextToken:
420 switch state {
421 case STATE_DEFAULT:
422 fmt.Fprintf(out, "%s", decoder.Raw())
423 case STATE_IN_STYLE:
424 sanitizeCSS(rc, out, decoder.Raw())
425 case STATE_IN_NOSCRIPT:
426 sanitizeHTML(rc, out, decoder.Raw())
427 }
428
429 case html.DoctypeToken, html.CommentToken:
430 out.Write(decoder.Raw())
431 }
432 } else {
433 switch token {
434 case html.StartTagToken:
435 tag, _ := decoder.TagName()
436 if inArray(tag, UNSAFE_ELEMENTS) {
437 unsafeElements = append(unsafeElements, tag)
438 }
439
440 case html.EndTagToken:
441 tag, _ := decoder.TagName()
442 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
443 unsafeElements = unsafeElements[:len(unsafeElements)-1]
444 }
445 }
446 }
447 }
448}
449
450func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
451 exclude := false
452 for _, attr := range attrs {
453 attrName := attr[0]
454 attrValue := attr[1]
455 if bytes.Equal(attrName, []byte("rel")) {
456 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
457 exclude = true
458 break
459 }
460 }
461 if bytes.Equal(attrName, []byte("as")) {
462 if bytes.Equal(attrValue, []byte("script")) {
463 exclude = true
464 break
465 }
466 }
467 }
468
469 if !exclude {
470 out.Write([]byte("<link"))
471 for _, attr := range attrs {
472 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
473 }
474 out.Write([]byte(">"))
475 }
476}
477
478func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
479 var http_equiv []byte
480 var content []byte
481
482 for _, attr := range attrs {
483 attrName := attr[0]
484 attrValue := attr[1]
485 if bytes.Equal(attrName, []byte("http-equiv")) {
486 http_equiv = bytes.ToLower(attrValue)
487 }
488 if bytes.Equal(attrName, []byte("content")) {
489 content = attrValue
490 }
491 }
492
493 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
494 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
495 contentUrl := content[urlIndex+4:]
496 if uri, err := rc.ProxifyURI(string(contentUrl)); err == nil {
497 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
498 }
499 } else {
500 sanitizeAttrs(rc, out, attrs)
501 }
502
503}
504
505func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
506 for _, attr := range attrs {
507 sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
508 }
509}
510
511func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
512 if inArray(attrName, SAFE_ATTRIBUTES) {
513 fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
514 return
515 }
516 switch string(attrName) {
517 case "src", "href", "action":
518 if uri, err := rc.ProxifyURI(string(attrValue)); err == nil {
519 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
520 } else {
521 log.Println("cannot proxify uri:", attrValue)
522 }
523 case "style":
524 cssAttr := bytes.NewBuffer(nil)
525 sanitizeCSS(rc, cssAttr, attrValue)
526 fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
527 }
528}
529
530func mergeURIs(u1, u2 *url.URL) {
531 if u2.Scheme == "" || u2.Scheme == "//" {
532 u2.Scheme = u1.Scheme
533 }
534 if u2.Host == "" && u1.Path != "" {
535 u2.Host = u1.Host
536 if len(u2.Path) == 0 || u2.Path[0] != '/' {
537 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
538 }
539 }
540}
541
542func (rc *RequestConfig) ProxifyURI(uri string) (string, error) {
543 // TODO check malicious data: - e.g. data:script
544 if strings.HasPrefix(uri, "data:") {
545 return uri, nil
546 }
547
548 if len(uri) > 0 && uri[0] == '#' {
549 return uri, nil
550 }
551
552 u, err := url.Parse(uri)
553 if err != nil {
554 return "", err
555 }
556 mergeURIs(rc.BaseURL, u)
557
558 uri = u.String()
559
560 if rc.Key == nil {
561 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
562 }
563 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
564}
565
566func inArray(b []byte, a [][]byte) bool {
567 for _, b2 := range a {
568 if bytes.Equal(b, b2) {
569 return true
570 }
571 }
572 return false
573}
574
575func hash(msg string, key []byte) string {
576 mac := hmac.New(sha256.New, key)
577 mac.Write([]byte(msg))
578 return hex.EncodeToString(mac.Sum(nil))
579}
580
581func verifyRequestURI(uri, hashMsg, key []byte) bool {
582 h := make([]byte, hex.DecodedLen(len(hashMsg)))
583 _, err := hex.Decode(h, hashMsg)
584 if err != nil {
585 log.Println("hmac error:", err)
586 return false
587 }
588 mac := hmac.New(sha256.New, key)
589 mac.Write(uri)
590 return hmac.Equal(h, mac.Sum(nil))
591}
592
593func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
594 ctx.SetContentType("text/html")
595 ctx.Write([]byte(`<!doctype html>
596<head>
597<title>MortyProxy</title>
598<style>
599body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
600input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
601a { text-decoration: none; #2980b9; }
602h1, h2 { font-weight: 200; margin-bottom: 2rem; }
603h1 { font-size: 3em; }
604.footer { position: absolute; bottom: 2em; width: 100%; }
605.footer p { font-size: 0.8em; }
606
607</style>
608</head>
609<body>
610 <h1>MortyProxy</h1>`))
611 if err != nil {
612 ctx.SetStatusCode(404)
613 log.Println("error:", err)
614 ctx.Write([]byte("<h2>Error: "))
615 ctx.Write([]byte(html.EscapeString(err.Error())))
616 ctx.Write([]byte("</h2>"))
617 } else {
618 ctx.SetStatusCode(200)
619 }
620 if p.Key == nil {
621 ctx.Write([]byte(`
622<form action="post">
623 Visit url: <input placeholder="https://url.." name="mortyurl" />
624 <input type="submit" value="go" />
625</form>`))
626 } else {
627 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
628 }
629 ctx.Write([]byte(`
630<div class="footer">
631 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
632 <a href="https://github.com/asciimoo/morty">view on github</a>
633 </p>
634</div>
635</body>
636</html>`))
637}
638
639func main() {
640
641 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
642 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
643 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
644 flag.Parse()
645
646 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
647
648 if *key != "" {
649 p.Key = []byte(*key)
650 }
651
652 log.Println("listening on", *listen)
653
654 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
655 log.Fatal("Error in ListenAndServe:", err)
656 }
657}
Note: See TracBrowser for help on using the repository browser.