source: code/trunk/morty.go@ 17

Last change on this file since 17 was 17, checked in by asciimoo, 9 years ago

[enh] do not serve javascript files

File size: 15.4 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
17 "time"
18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
49 []byte("as"),
50 []byte("autocomplete"),
51 []byte("charset"),
52 []byte("checked"),
53 []byte("class"),
54 []byte("content"),
55 []byte("contenteditable"),
56 []byte("contextmenu"),
57 []byte("dir"),
58 []byte("for"),
59 []byte("height"),
60 []byte("hidden"),
61 []byte("id"),
62 []byte("lang"),
63 []byte("media"),
64 []byte("method"),
65 []byte("name"),
66 []byte("nowrap"),
67 []byte("placeholder"),
68 []byte("property"),
69 []byte("rel"),
70 []byte("spellcheck"),
71 []byte("tabindex"),
72 []byte("target"),
73 []byte("title"),
74 []byte("translate"),
75 []byte("type"),
76 []byte("value"),
77 []byte("width"),
78}
79
80var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
81 []byte("area"),
82 []byte("base"),
83 []byte("br"),
84 []byte("col"),
85 []byte("embed"),
86 []byte("hr"),
87 []byte("img"),
88 []byte("input"),
89 []byte("keygen"),
90 []byte("link"),
91 []byte("meta"),
92 []byte("param"),
93 []byte("source"),
94 []byte("track"),
95 []byte("wbr"),
96}
97
98type Proxy struct {
99 Key []byte
100 RequestTimeout time.Duration
101}
102
103type RequestConfig struct {
104 Key []byte
105 baseURL *url.URL
106}
107
108var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
109
110var HTML_BODY_EXTENSION string = `
111<div id="mortyheader">
112 <input type="checkbox" id="mortytoggle" autocomplete="off" />
113 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
114</div>
115<style>
116#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
117#mortyheader a { color: #3498db; }
118#mortyheader p { padding: 0; margin: 0; }
119#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
120#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
121input[type=checkbox]#mortytoggle { display: none; }
122input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
123</style>
124`
125
126func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
127
128 if appRequestHandler(ctx) {
129 return
130 }
131
132 requestHash := popRequestParam(ctx, []byte("mortyhash"))
133
134 requestURI := popRequestParam(ctx, []byte("mortyurl"))
135
136 if requestURI == nil {
137 p.serveMainPage(ctx, nil)
138 return
139 }
140
141 if p.Key != nil {
142 if !verifyRequestURI(requestURI, requestHash, p.Key) {
143 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
144 return
145 }
146 }
147
148 parsedURI, err := url.Parse(string(requestURI))
149
150 if err != nil {
151 p.serveMainPage(ctx, err)
152 return
153 }
154
155 req := fasthttp.AcquireRequest()
156 defer fasthttp.ReleaseRequest(req)
157 req.SetConnectionClose()
158
159 reqQuery := parsedURI.Query()
160 ctx.QueryArgs().VisitAll(func(key, value []byte) {
161 reqQuery.Add(string(key), string(value))
162 })
163
164 parsedURI.RawQuery = reqQuery.Encode()
165
166 uriStr := parsedURI.String()
167
168 log.Println("getting", uriStr)
169
170 req.SetRequestURI(uriStr)
171 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
172
173 resp := fasthttp.AcquireResponse()
174 defer fasthttp.ReleaseResponse(resp)
175
176 req.Header.SetMethodBytes(ctx.Method())
177 if ctx.IsPost() || ctx.IsPut() {
178 req.SetBody(ctx.PostBody())
179 }
180
181 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
182
183 if err != nil {
184 p.serveMainPage(ctx, err)
185 return
186 }
187
188 if resp.StatusCode() != 200 {
189 switch resp.StatusCode() {
190 case 301, 302, 303, 307, 308:
191 loc := resp.Header.Peek("Location")
192 if loc != nil {
193 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
194 if err == nil {
195 ctx.SetStatusCode(resp.StatusCode())
196 ctx.Response.Header.Add("Location", url)
197 log.Println("redirect to", string(loc))
198 return
199 }
200 }
201 }
202 log.Println("invalid request:", resp.StatusCode())
203 return
204 }
205
206 contentType := resp.Header.Peek("Content-Type")
207
208 if contentType == nil {
209 p.serveMainPage(ctx, errors.New("invalid content type"))
210 return
211 }
212
213 if bytes.Contains(bytes.ToLower(contentType), []byte("javascript")) {
214 p.serveMainPage(ctx, errors.New("forbidden content type"))
215 return
216 }
217
218 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
219
220 var responseBody []byte
221
222 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
223 var err error
224 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
225 if err != nil {
226 p.serveMainPage(ctx, err)
227 return
228 }
229 } else {
230 responseBody = resp.Body()
231 }
232
233 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
234
235 switch {
236 case bytes.Contains(contentType, []byte("css")):
237 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
238 case bytes.Contains(contentType, []byte("html")):
239 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
240 default:
241 ctx.Write(responseBody)
242 }
243}
244
245func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
246 // serve robots.txt
247 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
248 ctx.SetContentType("text/plain")
249 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
250 return true
251 }
252
253 return false
254}
255
256func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
257 param := ctx.QueryArgs().PeekBytes(paramName)
258
259 if param == nil {
260 param = ctx.PostArgs().PeekBytes(paramName)
261 if param != nil {
262 ctx.PostArgs().DelBytes(paramName)
263 }
264 } else {
265 ctx.QueryArgs().DelBytes(paramName)
266 }
267
268 return param
269}
270
271func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
272 // TODO
273
274 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
275
276 if urlSlices == nil {
277 out.Write(css)
278 return
279 }
280
281 startIndex := 0
282
283 for _, s := range urlSlices {
284 urlStart := s[4]
285 urlEnd := s[5]
286
287 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
288 out.Write(css[startIndex:urlStart])
289 out.Write([]byte(uri))
290 startIndex = urlEnd
291 } else {
292 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
293 }
294 }
295 if startIndex < len(css) {
296 out.Write(css[startIndex:len(css)])
297 }
298}
299
300func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
301 r := bytes.NewReader(htmlDoc)
302 decoder := html.NewTokenizer(r)
303 decoder.AllowCDATA(true)
304
305 unsafeElements := make([][]byte, 0, 8)
306 state := STATE_DEFAULT
307
308 for {
309 token := decoder.Next()
310 if token == html.ErrorToken {
311 err := decoder.Err()
312 if err != io.EOF {
313 log.Println("failed to parse HTML:")
314 }
315 break
316 }
317
318 if len(unsafeElements) == 0 {
319
320 switch token {
321 case html.StartTagToken, html.SelfClosingTagToken:
322 tag, hasAttrs := decoder.TagName()
323 safe := !inArray(tag, UNSAFE_ELEMENTS)
324 if !safe {
325 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
326 var unsafeTag []byte = make([]byte, len(tag))
327 copy(unsafeTag, tag)
328 unsafeElements = append(unsafeElements, unsafeTag)
329 }
330 break
331 }
332 if bytes.Equal(tag, []byte("noscript")) {
333 state = STATE_IN_NOSCRIPT
334 break
335 }
336 var attrs [][][]byte
337 if hasAttrs {
338 for {
339 attrName, attrValue, moreAttr := decoder.TagAttr()
340 attrs = append(attrs, [][]byte{attrName, attrValue})
341 if !moreAttr {
342 break
343 }
344 }
345 }
346 if bytes.Equal(tag, []byte("link")) {
347 sanitizeLinkTag(rc, out, attrs)
348 break
349 }
350
351 fmt.Fprintf(out, "<%s", tag)
352
353 if hasAttrs {
354 if bytes.Equal(tag, []byte("meta")) {
355 sanitizeMetaAttrs(rc, out, attrs)
356 } else {
357 sanitizeAttrs(rc, out, attrs)
358 }
359 }
360
361 if token == html.SelfClosingTagToken {
362 fmt.Fprintf(out, " />")
363 } else {
364 fmt.Fprintf(out, ">")
365 if bytes.Equal(tag, []byte("style")) {
366 state = STATE_IN_STYLE
367 }
368 }
369
370 if bytes.Equal(tag, []byte("form")) {
371 var formURL *url.URL
372 for _, attr := range attrs {
373 if bytes.Equal(attr[0], []byte("action")) {
374 formURL, _ = url.Parse(string(attr[1]))
375 mergeURIs(rc.baseURL, formURL)
376 break
377 }
378 }
379 if formURL == nil {
380 formURL = rc.baseURL
381 }
382 urlStr := formURL.String()
383 var key string
384 if rc.Key != nil {
385 key = hash(urlStr, rc.Key)
386 }
387 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
388
389 }
390
391 case html.EndTagToken:
392 tag, _ := decoder.TagName()
393 writeEndTag := true
394 switch string(tag) {
395 case "body":
396 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
397 case "style":
398 state = STATE_DEFAULT
399 case "noscript":
400 state = STATE_DEFAULT
401 writeEndTag = false
402 }
403 // skip noscript tags - only the tag, not the content, because javascript is sanitized
404 if writeEndTag {
405 fmt.Fprintf(out, "</%s>", tag)
406 }
407
408 case html.TextToken:
409 switch state {
410 case STATE_DEFAULT:
411 fmt.Fprintf(out, "%s", decoder.Raw())
412 case STATE_IN_STYLE:
413 sanitizeCSS(rc, out, decoder.Raw())
414 case STATE_IN_NOSCRIPT:
415 sanitizeHTML(rc, out, decoder.Raw())
416 }
417
418 case html.DoctypeToken, html.CommentToken:
419 out.Write(decoder.Raw())
420 }
421 } else {
422 switch token {
423 case html.StartTagToken:
424 tag, _ := decoder.TagName()
425 if inArray(tag, UNSAFE_ELEMENTS) {
426 unsafeElements = append(unsafeElements, tag)
427 }
428
429 case html.EndTagToken:
430 tag, _ := decoder.TagName()
431 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
432 unsafeElements = unsafeElements[:len(unsafeElements)-1]
433 }
434 }
435 }
436 }
437}
438
439func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
440 exclude := false
441 for _, attr := range attrs {
442 attrName := attr[0]
443 attrValue := attr[1]
444 if bytes.Equal(attrName, []byte("rel")) {
445 if bytes.Equal(attrValue, []byte("dns-prefetch")) {
446 exclude = true
447 break
448 }
449 }
450 if bytes.Equal(attrName, []byte("as")) {
451 if bytes.Equal(attrValue, []byte("script")) {
452 exclude = true
453 break
454 }
455 }
456 }
457
458 if !exclude {
459 out.Write([]byte("<link"))
460 for _, attr := range attrs {
461 sanitizeAttr(rc, out, attr[0], attr[1])
462 }
463 out.Write([]byte(">"))
464 }
465}
466
467func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
468 var http_equiv []byte
469 var content []byte
470
471 for _, attr := range attrs {
472 attrName := attr[0]
473 attrValue := attr[1]
474 if bytes.Equal(attrName, []byte("http-equiv")) {
475 http_equiv = bytes.ToLower(attrValue)
476 }
477 if bytes.Equal(attrName, []byte("content")) {
478 content = attrValue
479 }
480 }
481
482 urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
483 if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
484 contentUrl := content[urlIndex+4:]
485 if uri, err := proxifyURI(rc, string(contentUrl)); err == nil {
486 fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
487 }
488 } else {
489 sanitizeAttrs(rc, out, attrs)
490 }
491
492}
493
494func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
495 for _, attr := range attrs {
496 sanitizeAttr(rc, out, attr[0], attr[1])
497 }
498}
499
500func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue []byte) {
501 if inArray(attrName, SAFE_ATTRIBUTES) {
502 fmt.Fprintf(out, " %s=\"%s\"", attrName, attrValue)
503 return
504 }
505 switch string(attrName) {
506 case "src", "href", "action":
507 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
508 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
509 } else {
510 log.Println("cannot proxify uri:", attrValue)
511 }
512 case "style":
513 fmt.Fprintf(out, " %s=\"", attrName)
514 sanitizeCSS(rc, out, attrValue)
515 out.Write([]byte("\""))
516 }
517}
518
519func mergeURIs(u1, u2 *url.URL) {
520 if u2.Scheme == "" || u2.Scheme == "//" {
521 u2.Scheme = u1.Scheme
522 }
523 if u2.Host == "" && u1.Path != "" {
524 u2.Host = u1.Host
525 if len(u2.Path) == 0 || u2.Path[0] != '/' {
526 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
527 }
528 }
529}
530
531func proxifyURI(rc *RequestConfig, uri string) (string, error) {
532 // TODO check malicious data: - e.g. data:script
533 if strings.HasPrefix(uri, "data:") {
534 return uri, nil
535 }
536
537 if len(uri) > 0 && uri[0] == '#' {
538 return uri, nil
539 }
540
541 u, err := url.Parse(uri)
542 if err != nil {
543 return "", err
544 }
545 mergeURIs(rc.baseURL, u)
546
547 uri = u.String()
548
549 if rc.Key == nil {
550 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
551 }
552 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
553}
554
555func inArray(b []byte, a [][]byte) bool {
556 for _, b2 := range a {
557 if bytes.Equal(b, b2) {
558 return true
559 }
560 }
561 return false
562}
563
564func hash(msg string, key []byte) string {
565 mac := hmac.New(sha256.New, key)
566 mac.Write([]byte(msg))
567 return hex.EncodeToString(mac.Sum(nil))
568}
569
570func verifyRequestURI(uri, hashMsg, key []byte) bool {
571 h := make([]byte, hex.DecodedLen(len(hashMsg)))
572 _, err := hex.Decode(h, hashMsg)
573 if err != nil {
574 log.Println("hmac error:", err)
575 return false
576 }
577 mac := hmac.New(sha256.New, key)
578 mac.Write(uri)
579 return hmac.Equal(h, mac.Sum(nil))
580}
581
582func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
583 ctx.SetContentType("text/html")
584 ctx.Write([]byte(`<!doctype html>
585<head>
586<title>MortyProxy</title>
587<style>
588body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
589input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
590a { text-decoration: none; #2980b9; }
591h1, h2 { font-weight: 200; margin-bottom: 2rem; }
592h1 { font-size: 3em; }
593.footer { position: absolute; bottom: 2em; width: 100%; }
594.footer p { font-size: 0.8em; }
595
596</style>
597</head>
598<body>
599 <h1>MortyProxy</h1>`))
600 if err != nil {
601 ctx.SetStatusCode(404)
602 log.Println("error:", err)
603 ctx.Write([]byte("<h2>Error: "))
604 ctx.Write([]byte(html.EscapeString(err.Error())))
605 ctx.Write([]byte("</h2>"))
606 } else {
607 ctx.SetStatusCode(200)
608 }
609 if p.Key == nil {
610 ctx.Write([]byte(`
611<form action="post">
612 Visit url: <input placeholder="https://url.." name="mortyurl" />
613 <input type="submit" value="go" />
614</form>`))
615 } else {
616 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
617 }
618 ctx.Write([]byte(`
619<div class="footer">
620 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
621 <a href="https://github.com/asciimoo/morty">view on github</a>
622 </p>
623</div>
624</body>
625</html>`))
626}
627
628func main() {
629
630 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
631 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
632 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
633 flag.Parse()
634
635 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
636
637 if *key != "" {
638 p.Key = []byte(*key)
639 }
640
641 log.Println("listening on", *listen)
642
643 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
644 log.Fatal("Error in ListenAndServe:", err)
645 }
646}
Note: See TracBrowser for help on using the repository browser.