source: code/trunk/morty.go@ 11

Last change on this file since 11 was 11, checked in by asciimoo, 9 years ago

[enh] enhanced main page

File size: 14.5 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
17 "time"
18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("(url\\(|@import +)(['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
49 []byte("autocomplete"),
50 []byte("charset"),
51 []byte("checked"),
52 []byte("class"),
53 []byte("content"),
54 []byte("contenteditable"),
55 []byte("contextmenu"),
56 []byte("dir"),
57 []byte("for"),
58 []byte("height"),
59 []byte("hidden"),
60 []byte("id"),
61 []byte("lang"),
62 []byte("media"),
63 []byte("method"),
64 []byte("name"),
65 []byte("nowrap"),
66 []byte("placeholder"),
67 []byte("property"),
68 []byte("rel"),
69 []byte("spellcheck"),
70 []byte("tabindex"),
71 []byte("target"),
72 []byte("title"),
73 []byte("translate"),
74 []byte("type"),
75 []byte("value"),
76 []byte("width"),
77}
78
79var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
80 []byte("area"),
81 []byte("base"),
82 []byte("br"),
83 []byte("col"),
84 []byte("embed"),
85 []byte("hr"),
86 []byte("img"),
87 []byte("input"),
88 []byte("keygen"),
89 []byte("link"),
90 []byte("meta"),
91 []byte("param"),
92 []byte("source"),
93 []byte("track"),
94 []byte("wbr"),
95}
96
97type Proxy struct {
98 Key []byte
99 RequestTimeout time.Duration
100}
101
102type RequestConfig struct {
103 Key []byte
104 baseURL *url.URL
105}
106
107var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
108
109var HTML_BODY_EXTENSION string = `
110<div id="mortyheader">
111 <input type="checkbox" id="mortytoggle" autocomplete="off" />
112 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
113</div>
114<style>
115#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
116#mortyheader a { color: #3498db; }
117#mortyheader p { padding: 0; margin: 0; }
118#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
119#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
120input[type=checkbox]#mortytoggle { display: none; }
121input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
122</style>
123`
124
125func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
126
127 if appRequestHandler(ctx) {
128 return
129 }
130
131 requestHash := popRequestParam(ctx, []byte("mortyhash"))
132
133 requestURI := popRequestParam(ctx, []byte("mortyurl"))
134
135 if requestURI == nil {
136 p.serveMainPage(ctx, nil)
137 return
138 }
139
140 if p.Key != nil {
141 if !verifyRequestURI(requestURI, requestHash, p.Key) {
142 p.serveMainPage(ctx, errors.New(`invalid "mortyhash" parameter`))
143 return
144 }
145 }
146
147 parsedURI, err := url.Parse(string(requestURI))
148
149 if err != nil {
150 p.serveMainPage(ctx, err)
151 return
152 }
153
154 req := fasthttp.AcquireRequest()
155 defer fasthttp.ReleaseRequest(req)
156
157 reqQuery := parsedURI.Query()
158 ctx.QueryArgs().VisitAll(func(key, value []byte) {
159 reqQuery.Add(string(key), string(value))
160 })
161
162 parsedURI.RawQuery = reqQuery.Encode()
163
164 uriStr := parsedURI.String()
165
166 log.Println("getting", uriStr)
167
168 req.SetRequestURI(uriStr)
169 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
170
171 resp := fasthttp.AcquireResponse()
172 defer fasthttp.ReleaseResponse(resp)
173
174 req.Header.SetMethodBytes(ctx.Method())
175 if ctx.IsPost() || ctx.IsPut() {
176 req.SetBody(ctx.PostBody())
177 }
178
179 err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
180
181 if err != nil {
182 p.serveMainPage(ctx, err)
183 return
184 }
185
186 if resp.StatusCode() != 200 {
187 switch resp.StatusCode() {
188 case 301, 302, 303, 307, 308:
189 loc := resp.Header.Peek("Location")
190 if loc != nil {
191 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
192 if err == nil {
193 ctx.SetStatusCode(resp.StatusCode())
194 ctx.Response.Header.Add("Location", url)
195 log.Println("redirect to", string(loc))
196 return
197 }
198 }
199 }
200 log.Println("invalid request:", resp.StatusCode())
201 return
202 }
203
204 contentType := resp.Header.Peek("Content-Type")
205
206 if contentType == nil {
207 p.serveMainPage(ctx, errors.New("invalid content type"))
208 return
209 }
210
211 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
212
213 var responseBody []byte
214
215 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
216 var err error
217 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
218 if err != nil {
219 p.serveMainPage(ctx, err)
220 return
221 }
222 } else {
223 responseBody = resp.Body()
224 }
225
226 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
227
228 switch {
229 case bytes.Contains(contentType, []byte("css")):
230 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
231 case bytes.Contains(contentType, []byte("html")):
232 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
233 default:
234 ctx.Write(responseBody)
235 }
236}
237
238func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
239 // serve robots.txt
240 if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
241 ctx.SetContentType("text/plain")
242 ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
243 return true
244 }
245
246 return false
247}
248
249func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
250 param := ctx.QueryArgs().PeekBytes(paramName)
251
252 if param == nil {
253 param = ctx.PostArgs().PeekBytes(paramName)
254 if param != nil {
255 ctx.PostArgs().DelBytes(paramName)
256 }
257 } else {
258 ctx.QueryArgs().DelBytes(paramName)
259 }
260
261 return param
262}
263
264func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
265 // TODO
266
267 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
268
269 if urlSlices == nil {
270 out.Write(css)
271 return
272 }
273
274 startIndex := 0
275
276 for _, s := range urlSlices {
277 urlStart := s[6]
278 urlEnd := s[7]
279
280 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
281 out.Write(css[startIndex:urlStart])
282 out.Write([]byte(uri))
283 startIndex = urlEnd
284 } else {
285 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
286 }
287 }
288 if startIndex < len(css) {
289 out.Write(css[startIndex:len(css)])
290 }
291}
292
293func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
294 r := bytes.NewReader(htmlDoc)
295 decoder := html.NewTokenizer(r)
296 decoder.AllowCDATA(true)
297
298 unsafeElements := make([][]byte, 0, 8)
299 state := STATE_DEFAULT
300
301 for {
302 token := decoder.Next()
303 if token == html.ErrorToken {
304 err := decoder.Err()
305 if err != io.EOF {
306 log.Println("failed to parse HTML:")
307 }
308 break
309 }
310
311 if len(unsafeElements) == 0 {
312
313 switch token {
314 case html.StartTagToken, html.SelfClosingTagToken:
315 tag, hasAttrs := decoder.TagName()
316 safe := !inArray(tag, UNSAFE_ELEMENTS)
317 if !safe {
318 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
319 var unsafeTag []byte = make([]byte, len(tag))
320 copy(unsafeTag, tag)
321 unsafeElements = append(unsafeElements, unsafeTag)
322 }
323 break
324 }
325 if bytes.Equal(tag, []byte("noscript")) {
326 state = STATE_IN_NOSCRIPT
327 break
328 }
329 var attrs [][][]byte
330 fmt.Fprintf(out, "<%s", tag)
331 if hasAttrs {
332 for {
333 attrName, attrValue, moreAttr := decoder.TagAttr()
334 attrs = append(attrs, [][]byte{attrName, attrValue})
335 if !moreAttr {
336 break
337 }
338 }
339 if bytes.Equal(tag, []byte("meta")) {
340 sanitizeMetaAttrs(rc, out, attrs)
341 } else {
342 sanitizeAttrs(rc, out, attrs)
343 }
344 }
345 if token == html.SelfClosingTagToken {
346 fmt.Fprintf(out, " />")
347 } else {
348 fmt.Fprintf(out, ">")
349 if bytes.Equal(tag, []byte("style")) {
350 state = STATE_IN_STYLE
351 }
352 }
353 if bytes.Equal(tag, []byte("form")) {
354 var formURL *url.URL
355 for _, attr := range attrs {
356 if bytes.Equal(attr[0], []byte("action")) {
357 formURL, _ = url.Parse(string(attr[1]))
358 mergeURIs(rc.baseURL, formURL)
359 break
360 }
361 }
362 if formURL == nil {
363 formURL = rc.baseURL
364 }
365 urlStr := formURL.String()
366 var key string
367 if rc.Key != nil {
368 key = hash(urlStr, rc.Key)
369 }
370 fmt.Fprintf(out, HTML_FORM_EXTENSION, urlStr, key)
371
372 }
373
374 case html.EndTagToken:
375 tag, _ := decoder.TagName()
376 writeEndTag := true
377 switch string(tag) {
378 case "body":
379 fmt.Fprintf(out, HTML_BODY_EXTENSION, rc.baseURL.String())
380 case "style":
381 state = STATE_DEFAULT
382 case "noscript":
383 state = STATE_DEFAULT
384 writeEndTag = false
385 }
386 // skip noscript tags - only the tag, not the content, because javascript is sanitized
387 if writeEndTag {
388 fmt.Fprintf(out, "</%s>", tag)
389 }
390
391 case html.TextToken:
392 switch state {
393 case STATE_DEFAULT:
394 fmt.Fprintf(out, "%s", decoder.Raw())
395 case STATE_IN_STYLE:
396 sanitizeCSS(rc, out, decoder.Raw())
397 case STATE_IN_NOSCRIPT:
398 sanitizeHTML(rc, out, decoder.Raw())
399 }
400
401 case html.DoctypeToken, html.CommentToken:
402 out.Write(decoder.Raw())
403 }
404 } else {
405 switch token {
406 case html.StartTagToken:
407 tag, _ := decoder.TagName()
408 if inArray(tag, UNSAFE_ELEMENTS) {
409 unsafeElements = append(unsafeElements, tag)
410 }
411
412 case html.EndTagToken:
413 tag, _ := decoder.TagName()
414 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
415 unsafeElements = unsafeElements[:len(unsafeElements)-1]
416 }
417 }
418 }
419 }
420}
421
422func sanitizeMetaAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
423 var http_equiv []byte
424 var content []byte
425
426 for _, attr := range attrs {
427 attrName := attr[0]
428 attrValue := attr[1]
429 if bytes.Equal(attrName, []byte("http-equiv")) {
430 http_equiv = bytes.ToLower(attrValue)
431 }
432 if bytes.Equal(attrName, []byte("content")) {
433 content = attrValue
434 }
435 }
436
437 if bytes.Equal(http_equiv, []byte("refresh")) && bytes.Index(content, []byte(";url=")) != -1 {
438 parts := bytes.SplitN(content, []byte(";url="), 2)
439 if uri, err := proxifyURI(rc, string(parts[1])); err == nil {
440 fmt.Fprintf(out, ` http-equiv="refresh" content="%s;%s"`, parts[0], uri)
441 }
442 } else {
443 sanitizeAttrs(rc, out, attrs)
444 }
445
446}
447
448func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
449 for _, attr := range attrs {
450 sanitizeAttr(rc, out, attr[0], attr[1])
451 }
452}
453
454func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue []byte) {
455 if inArray(attrName, SAFE_ATTRIBUTES) {
456 fmt.Fprintf(out, " %s=\"%s\"", attrName, attrValue)
457 return
458 }
459 switch string(attrName) {
460 case "src", "href", "action":
461 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
462 fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
463 } else {
464 log.Println("cannot proxify uri:", attrValue)
465 }
466 case "style":
467 fmt.Fprintf(out, " %s=\"", attrName)
468 sanitizeCSS(rc, out, attrValue)
469 out.Write([]byte("\""))
470 }
471}
472
473func mergeURIs(u1, u2 *url.URL) {
474 if u2.Scheme == "" || u2.Scheme == "//" {
475 u2.Scheme = u1.Scheme
476 }
477 if u2.Host == "" && u1.Path != "" {
478 u2.Host = u1.Host
479 if len(u2.Path) == 0 || u2.Path[0] != '/' {
480 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
481 }
482 }
483}
484
485func proxifyURI(rc *RequestConfig, uri string) (string, error) {
486 // TODO check malicious data: - e.g. data:script
487 if strings.HasPrefix(uri, "data:") {
488 return uri, nil
489 }
490
491 if len(uri) > 0 && uri[0] == '#' {
492 return uri, nil
493 }
494
495 u, err := url.Parse(uri)
496 if err != nil {
497 return "", err
498 }
499 mergeURIs(rc.baseURL, u)
500
501 uri = u.String()
502
503 if rc.Key == nil {
504 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
505 }
506 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
507}
508
509func inArray(b []byte, a [][]byte) bool {
510 for _, b2 := range a {
511 if bytes.Equal(b, b2) {
512 return true
513 }
514 }
515 return false
516}
517
518func hash(msg string, key []byte) string {
519 mac := hmac.New(sha256.New, key)
520 mac.Write([]byte(msg))
521 return hex.EncodeToString(mac.Sum(nil))
522}
523
524func verifyRequestURI(uri, hashMsg, key []byte) bool {
525 h := make([]byte, hex.DecodedLen(len(hashMsg)))
526 _, err := hex.Decode(h, hashMsg)
527 if err != nil {
528 log.Println("hmac error:", err)
529 return false
530 }
531 mac := hmac.New(sha256.New, key)
532 mac.Write(uri)
533 return hmac.Equal(h, mac.Sum(nil))
534}
535
536func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, err error) {
537 ctx.SetContentType("text/html")
538 ctx.Write([]byte(`<!doctype html>
539<head>
540<title>MortyProxy</title>
541<style>
542body { font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #444; background: #FAFAFA; margin: 0; padding: 0; font-size: 1.1em; }
543input { border: 1px solid #888; padding: 0.3em; color: #444; background: #FFF; font-size: 1.1em; }
544a { text-decoration: none; #2980b9; }
545h1, h2 { font-weight: 200; margin-bottom: 2rem; }
546h1 { font-size: 3em; }
547.footer { position: absolute; bottom: 2em; width: 100%; }
548.footer p { font-size: 0.8em; }
549
550</style>
551</head>
552<body>
553 <h1>MortyProxy</h1>`))
554 if err != nil {
555 ctx.SetStatusCode(404)
556 log.Println("error:", err)
557 ctx.Write([]byte("<h2>Error: "))
558 ctx.Write([]byte(html.EscapeString(err.Error())))
559 ctx.Write([]byte("</h2>"))
560 } else {
561 ctx.SetStatusCode(200)
562 }
563 if p.Key == nil {
564 ctx.Write([]byte(`
565<form action="post">
566 Visit url: <input placeholder="https://url.." name="mortyurl" />
567 <input type="submit" value="go" />
568</form>`))
569 } else {
570 ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
571 }
572 ctx.Write([]byte(`
573<div class="footer">
574 <p>Morty rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
575 <a href="https://github.com/asciimoo/morty">view on github</a>
576 </p>
577</div>
578</body>
579</html>`))
580}
581
582func main() {
583
584 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
585 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
586 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
587 flag.Parse()
588
589 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
590
591 if *key != "" {
592 p.Key = []byte(*key)
593 }
594
595 log.Println("listening on", *listen)
596
597 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
598 log.Fatal("Error in ListenAndServe:", err)
599 }
600}
Note: See TracBrowser for help on using the repository browser.