source: code/trunk/morty.go@ 12

Last change on this file since 12 was 12, checked in by asciimoo, 9 years ago

[mod] disable connection keepalive

File size: 14.6 KB
RevLine 
[1]1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
[4]17 "time"
[1]18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
[8]34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("(url\\(|@import +)(['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
[1]35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
49 []byte("autocomplete"),
50 []byte("charset"),
51 []byte("checked"),
52 []byte("class"),
53 []byte("content"),
54 []byte("contenteditable"),
55 []byte("contextmenu"),
56 []byte("dir"),
57 []byte("for"),
58 []byte("height"),
59 []byte("hidden"),
60 []byte("id"),
61 []byte("lang"),
62 []byte("media"),
63 []byte("method"),
64 []byte("name"),
65 []byte("nowrap"),
66 []byte("placeholder"),
67 []byte("property"),
68 []byte("rel"),
69 []byte("spellcheck"),
70 []byte("tabindex"),
71 []byte("target"),
72 []byte("title"),
73 []byte("translate"),
74 []byte("type"),
75 []byte("value"),
76 []byte("width"),
77}
78
79var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
80 []byte("area"),
81 []byte("base"),
82 []byte("br"),
83 []byte("col"),
84 []byte("embed"),
85 []byte("hr"),
86 []byte("img"),
87 []byte("input"),
88 []byte("keygen"),
89 []byte("link"),
90 []byte("meta"),
91 []byte("param"),
92 []byte("source"),
93 []byte("track"),
94 []byte("wbr"),
95}
96
97type Proxy struct {
[4]98 Key []byte
99 RequestTimeout time.Duration
[1]100}
101
102type RequestConfig struct {
103 Key []byte
104 baseURL *url.URL
105}
106
[2]107var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
[1]108
109var HTML_BODY_EXTENSION string = `
110<div id="mortyheader">
111 <input type="checkbox" id="mortytoggle" autocomplete="off" />
[5]112 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
[1]113</div>
114<style>
[5]115#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
[1]116#mortyheader a { color: #3498db; }
[5]117#mortyheader p { padding: 0; margin: 0; }
118#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
119#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
[1]120input[type=checkbox]#mortytoggle { display: none; }
121input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
122</style>
123`
124
125func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
[10]126
127 if appRequestHandler(ctx) {
128 return
129 }
130
[1]131 requestHash := popRequestParam(ctx, []byte("mortyhash"))
132
133 requestURI := popRequestParam(ctx, []byte("mortyurl"))
134
135 if requestURI == nil {
[11]136 p.serveMainPage(ctx, nil)
[1]137 return
138 }
139
140 if p.Key != nil {
141 if !verifyRequestURI(requestURI, requestHash, p.Key) {
[11]142 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
[1]143 return
144 }
145 }
146
147 parsedURI, err := url.Parse(string(requestURI))
148
[11]149 if err != nil {
150 p.serveMainPage(ctx, err)
[1]151 return
152 }
153
154 req := fasthttp.AcquireRequest()
155 defer fasthttp.ReleaseRequest(req)
[12]156 req.SetConnectionClose()
[1]157
158 reqQuery := parsedURI.Query()
159 ctx.QueryArgs().VisitAll(func(key, value []byte) {
160 reqQuery.Add(string(key), string(value))
161 })
162
163 parsedURI.RawQuery = reqQuery.Encode()
164
165 uriStr := parsedURI.String()
166
167 log.Println("getting", uriStr)
168
169 req.SetRequestURI(uriStr)
170 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
171
172 resp := fasthttp.AcquireResponse()
173 defer fasthttp.ReleaseResponse(resp)
174
175 req.Header.SetMethodBytes(ctx.Method())
176 if ctx.IsPost() || ctx.IsPut() {
177 req.SetBody(ctx.PostBody())
178 }
179
[11]180 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
181
182 if err != nil {
183 p.serveMainPage(ctx, err)
[1]184 return
185 }
186
187 if resp.StatusCode() != 200 {
188 switch resp.StatusCode() {
[7]189 case 301, 302, 303, 307, 308:
[1]190 loc := resp.Header.Peek("Location")
191 if loc != nil {
192 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
193 if err == nil {
194 ctx.SetStatusCode(resp.StatusCode())
195 ctx.Response.Header.Add("Location", url)
196 log.Println("redirect to", string(loc))
197 return
198 }
199 }
200 }
201 log.Println("invalid request:", resp.StatusCode())
202 return
203 }
204
205 contentType := resp.Header.Peek("Content-Type")
206
207 if contentType == nil {
[11]208 p.serveMainPage(ctx, errors.New("invalid content type"))
[1]209 return
210 }
211
212 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
213
214 var responseBody []byte
215
216 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
217 var err error
218 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
[11]219 if err != nil {
220 p.serveMainPage(ctx, err)
[1]221 return
222 }
223 } else {
224 responseBody = resp.Body()
225 }
226
227 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
228
229 switch {
230 case bytes.Contains(contentType, []byte("css")):
231 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
232 case bytes.Contains(contentType, []byte("html")):
233 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
234 default:
235 ctx.Write(responseBody)
236 }
237}
238
[10]239func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
[11]240 // serve robots.txt
[10]241 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
242 ctx.SetContentType("text/plain")
243 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
244 return true
245 }
[11]246
[10]247 return false
248}
249
[1]250func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
251 param := ctx.QueryArgs().PeekBytes(paramName)
252
253 if param == nil {
254 param = ctx.PostArgs().PeekBytes(paramName)
255 if param != nil {
256 ctx.PostArgs().DelBytes(paramName)
257 }
258 } else {
259 ctx.QueryArgs().DelBytes(paramName)
260 }
261
262 return param
263}
264
[9]265func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
[1]266 // TODO
267
268 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
269
270 if urlSlices == nil {
[9]271 out.Write(css)
[1]272 return
273 }
274
275 startIndex := 0
276
277 for _, s := range urlSlices {
[8]278 urlStart := s[6]
279 urlEnd := s[7]
[1]280
281 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
[9]282 out.Write(css[startIndex:urlStart])
283 out.Write([]byte(uri))
[1]284 startIndex = urlEnd
285 } else {
286 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
287 }
288 }
289 if startIndex < len(css) {
[9]290 out.Write(css[startIndex:len(css)])
[1]291 }
292}
293
[9]294func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
[1]295 r := bytes.NewReader(htmlDoc)
296 decoder := html.NewTokenizer(r)
297 decoder.AllowCDATA(true)
298
299 unsafeElements := make([][]byte, 0, 8)
300 state := STATE_DEFAULT
301
302 for {
303 token := decoder.Next()
304 if token == html.ErrorToken {
305 err := decoder.Err()
306 if err != io.EOF {
307 log.Println("failed to parse HTML:")
308 }
309 break
310 }
311
312 if len(unsafeElements) == 0 {
313
314 switch token {
315 case html.StartTagToken, html.SelfClosingTagToken:
316 tag, hasAttrs := decoder.TagName()
317 safe := !inArray(tag, UNSAFE_ELEMENTS)
318 if !safe {
319 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
320 var unsafeTag []byte = make([]byte, len(tag))
321 copy(unsafeTag, tag)
322 unsafeElements = append(unsafeElements, unsafeTag)
323 }
324 break
325 }
326 if bytes.Equal(tag, []byte("noscript")) {
327 state = STATE_IN_NOSCRIPT
328 break
329 }
330 var attrs [][][]byte
[9]331 fmt.Fprintf(out, "<%s", tag)
[1]332 if hasAttrs {
333 for {
334 attrName, attrValue, moreAttr := decoder.TagAttr()
335 attrs = append(attrs, [][]byte{attrName, attrValue})
336 if !moreAttr {
337 break
338 }
339 }
340 if bytes.Equal(tag, []byte("meta")) {
[9]341 sanitizeMetaAttrs(rc, out, attrs)
[1]342 } else {
[9]343 sanitizeAttrs(rc, out, attrs)
[1]344 }
345 }
346 if token == html.SelfClosingTagToken {
[9]347 fmt.Fprintf(out, " />")
[1]348 } else {
[9]349 fmt.Fprintf(out, ">")
[1]350 if bytes.Equal(tag, []byte("style")) {
351 state = STATE_IN_STYLE
352 }
353 }
354 if bytes.Equal(tag, []byte("form")) {
355 var formURL *url.URL
356 for _, attr := range attrs {
357 if bytes.Equal(attr[0], []byte("action")) {
358 formURL, _ = url.Parse(string(attr[1]))
359 mergeURIs(rc.baseURL, formURL)
360 break
361 }
362 }
363 if formURL == nil {
364 formURL = rc.baseURL
365 }
[2]366 urlStr := formURL.String()
367 var key string
368 if rc.Key != nil {
369 key = hash(urlStr, rc.Key)
370 }
[9]371 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
[1]372
373 }
374
375 case html.EndTagToken:
376 tag, _ := decoder.TagName()
377 writeEndTag := true
378 switch string(tag) {
379 case "body":
[9]380 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
[1]381 case "style":
382 state = STATE_DEFAULT
383 case "noscript":
384 state = STATE_DEFAULT
385 writeEndTag = false
386 }
387 // skip noscript tags - only the tag, not the content, because javascript is sanitized
388 if writeEndTag {
[9]389 fmt.Fprintf(out, "</%s>", tag)
[1]390 }
391
392 case html.TextToken:
393 switch state {
394 case STATE_DEFAULT:
[9]395 fmt.Fprintf(out, "%s", decoder.Raw())
[1]396 case STATE_IN_STYLE:
[9]397 sanitizeCSS(rc, out, decoder.Raw())
[1]398 case STATE_IN_NOSCRIPT:
[9]399 sanitizeHTML(rc, out, decoder.Raw())
[1]400 }
401
402 case html.DoctypeToken, html.CommentToken:
[9]403 out.Write(decoder.Raw())
[1]404 }
405 } else {
406 switch token {
407 case html.StartTagToken:
408 tag, _ := decoder.TagName()
409 if inArray(tag, UNSAFE_ELEMENTS) {
410 unsafeElements = append(unsafeElements, tag)
411 }
412
413 case html.EndTagToken:
414 tag, _ := decoder.TagName()
415 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
416 unsafeElements = unsafeElements[:len(unsafeElements)-1]
417 }
418 }
419 }
420 }
421}
422
[9]423func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]424 var http_equiv []byte
425 var content []byte
426
427 for _, attr := range attrs {
428 attrName := attr[0]
429 attrValue := attr[1]
430 if bytes.Equal(attrName, []byte("http-equiv")) {
431 http_equiv = bytes.ToLower(attrValue)
432 }
433 if bytes.Equal(attrName, []byte("content")) {
434 content = attrValue
435 }
436 }
437
438 if bytes.Equal(http_equiv, []byte("refresh")) && bytes.Index(content, []byte(";url=")) != -1 {
439 parts := bytes.SplitN(content, []byte(";url="), 2)
440 if uri, err := proxifyURI(rc, string(parts[1])); err == nil {
[9]441 fmt.Fprintf(out, ` http-equiv="refresh" content="%s;%s"`, parts[0], uri)
[1]442 }
443 } else {
[9]444 sanitizeAttrs(rc, out, attrs)
[1]445 }
446
447}
448
[9]449func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
[1]450 for _, attr := range attrs {
[9]451 sanitizeAttr(rc, out, attr[0], attr[1])
[1]452 }
453}
454
[9]455func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue []byte) {
[1]456 if inArray(attrName, SAFE_ATTRIBUTES) {
[9]457 fmt.Fprintf(out, " %s=\"%s\"", attrName, attrValue)
[1]458 return
459 }
460 switch string(attrName) {
461 case "src", "href", "action":
462 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
[9]463 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
[1]464 } else {
465 log.Println("cannot proxify uri:", attrValue)
466 }
467 case "style":
[9]468 fmt.Fprintf(out, " %s=\"", attrName)
469 sanitizeCSS(rc, out, attrValue)
470 out.Write([]byte("\""))
[1]471 }
472}
473
474func mergeURIs(u1, u2 *url.URL) {
475 if u2.Scheme == "" || u2.Scheme == "//" {
476 u2.Scheme = u1.Scheme
477 }
[3]478 if u2.Host == "" && u1.Path != "" {
[1]479 u2.Host = u1.Host
480 if len(u2.Path) == 0 || u2.Path[0] != '/' {
481 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
482 }
483 }
484}
485
486func proxifyURI(rc *RequestConfig, uri string) (string, error) {
487 // TODO check malicious data: - e.g. data:script
488 if strings.HasPrefix(uri, "data:") {
489 return uri, nil
490 }
491
492 if len(uri) > 0 && uri[0] == '#' {
493 return uri, nil
494 }
495
496 u, err := url.Parse(uri)
497 if err != nil {
498 return "", err
499 }
500 mergeURIs(rc.baseURL, u)
501
502 uri = u.String()
503
504 if rc.Key == nil {
505 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
506 }
507 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
508}
509
510func inArray(b []byte, a [][]byte) bool {
511 for _, b2 := range a {
512 if bytes.Equal(b, b2) {
513 return true
514 }
515 }
516 return false
517}
518
519func hash(msg string, key []byte) string {
520 mac := hmac.New(sha256.New, key)
521 mac.Write([]byte(msg))
522 return hex.EncodeToString(mac.Sum(nil))
523}
524
525func verifyRequestURI(uri, hashMsg, key []byte) bool {
526 h := make([]byte, hex.DecodedLen(len(hashMsg)))
527 _, err := hex.Decode(h, hashMsg)
528 if err != nil {
529 log.Println("hmac error:", err)
530 return false
531 }
532 mac := hmac.New(sha256.New, key)
533 mac.Write(uri)
534 return hmac.Equal(h, mac.Sum(nil))
535}
536
[11]537func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
[1]538 ctx.SetContentType("text/html")
539 ctx.Write([]byte(`<!doctype html>
540<head>
[11]541<title>MortyProxy</title>
542<style>
543body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
544input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
545a { text-decoration: none; #2980b9; }
546h1, h2 { font-weight: 200; margin-bottom: 2rem; }
547h1 { font-size: 3em; }
548.footer { position: absolute; bottom: 2em; width: 100%; }
549.footer p { font-size: 0.8em; }
550
551</style>
[1]552</head>
[11]553<body>
554 <h1>MortyProxy</h1>`))
555 if err != nil {
556 ctx.SetStatusCode(404)
557 log.Println("error:", err)
558 ctx.Write([]byte("<h2>Error: "))
559 ctx.Write([]byte(html.EscapeString(err.Error())))
560 ctx.Write([]byte("</h2>"))
561 } else {
562 ctx.SetStatusCode(200)
563 }
[1]564 if p.Key == nil {
565 ctx.Write([]byte(`
566<form action="post">
567 Visit url: <input placeholder="https://url.." name="mortyurl" />
568 <input type="submit" value="go" />
569</form>`))
[11]570 } else {
571 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
[1]572 }
573 ctx.Write([]byte(`
[11]574<div class="footer">
575 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
576 <a href="https://github.com/asciimoo/morty">view on github</a>
577 </p>
578</div>
[1]579</body>
580</html>`))
581}
582
583func main() {
584
[2]585 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
[1]586 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
[4]587 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
[1]588 flag.Parse()
589
[4]590 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
[1]591
592 if *key != "" {
593 p.Key = []byte(*key)
594 }
595
596 log.Println("listening on", *listen)
597
598 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
599 log.Fatal("Error in ListenAndServe:", err)
600 }
601}
Note: See TracBrowser for help on using the repository browser.