source: code/trunk/morty.go@ 7

Last change on this file since 7 was 7, checked in by asciimoo, 9 years ago

[fix] support all kind of http redirections

File size: 13.4 KB
Line 
1package main
2
3import (
4 "bytes"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "errors"
9 "flag"
10 "fmt"
11 "io"
12 "log"
13 "net/url"
14 "path"
15 "regexp"
16 "strings"
17 "time"
18
19 "github.com/valyala/fasthttp"
20 "golang.org/x/net/html"
21 "golang.org/x/text/encoding/charmap"
22)
23
24const (
25 STATE_DEFAULT int = 0
26 STATE_IN_STYLE int = 1
27 STATE_IN_NOSCRIPT int = 2
28)
29
30var CLIENT *fasthttp.Client = &fasthttp.Client{
31 MaxResponseBodySize: 10 * 1024 * 1024, // 10M
32}
33
34var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)")
35
36var UNSAFE_ELEMENTS [][]byte = [][]byte{
37 []byte("applet"),
38 []byte("canvas"),
39 []byte("embed"),
40 //[]byte("iframe"),
41 []byte("script"),
42}
43
44var SAFE_ATTRIBUTES [][]byte = [][]byte{
45 []byte("abbr"),
46 []byte("accesskey"),
47 []byte("align"),
48 []byte("alt"),
49 []byte("autocomplete"),
50 []byte("charset"),
51 []byte("checked"),
52 []byte("class"),
53 []byte("content"),
54 []byte("contenteditable"),
55 []byte("contextmenu"),
56 []byte("dir"),
57 []byte("for"),
58 []byte("height"),
59 []byte("hidden"),
60 []byte("id"),
61 []byte("lang"),
62 []byte("media"),
63 []byte("method"),
64 []byte("name"),
65 []byte("nowrap"),
66 []byte("placeholder"),
67 []byte("property"),
68 []byte("rel"),
69 []byte("spellcheck"),
70 []byte("tabindex"),
71 []byte("target"),
72 []byte("title"),
73 []byte("translate"),
74 []byte("type"),
75 []byte("value"),
76 []byte("width"),
77}
78
79var SELF_CLOSING_ELEMENTS [][]byte = [][]byte{
80 []byte("area"),
81 []byte("base"),
82 []byte("br"),
83 []byte("col"),
84 []byte("embed"),
85 []byte("hr"),
86 []byte("img"),
87 []byte("input"),
88 []byte("keygen"),
89 []byte("link"),
90 []byte("meta"),
91 []byte("param"),
92 []byte("source"),
93 []byte("track"),
94 []byte("wbr"),
95}
96
97type Proxy struct {
98 Key []byte
99 RequestTimeout time.Duration
100}
101
102type RequestConfig struct {
103 Key []byte
104 baseURL *url.URL
105}
106
107var HTML_FORM_EXTENSION string = `<input type="hidden" name="mortyurl" value="%s" /><input type="hidden" name="mortyhash" value="%s" />`
108
109var HTML_BODY_EXTENSION string = `
110<div id="mortyheader">
111 <input type="checkbox" id="mortytoggle" autocomplete="off" />
112 <div><p>This is a proxified and sanitized view of the page,<br />visit <a href="%s">original site</a>.</p><div><p><label for="mortytoggle">hide</label></p></div></div>
113</div>
114<style>
115#mortyheader { position: fixed; top: 15%%; left: 0; max-width: 10em; color: #444; overflow: hidden; z-index: 110000; font-size: 0.9em; padding: 1em 1em 1em 0; margin: 0; }
116#mortyheader a { color: #3498db; }
117#mortyheader p { padding: 0; margin: 0; }
118#mortyheader > div { padding: 8px; font-size: 0.9em; border-width: 4px 4px 4px 0; border-style: solid; border-color: #1abc9c; background: #FFF; line-height: 1em; }
119#mortyheader label { text-align: right; cursor: pointer; display: block; color: #444; padding: 0; margin: 0; }
120input[type=checkbox]#mortytoggle { display: none; }
121input[type=checkbox]#mortytoggle:checked ~ div { display: none; }
122</style>
123`
124
125func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
126 requestHash := popRequestParam(ctx, []byte("mortyhash"))
127
128 requestURI := popRequestParam(ctx, []byte("mortyurl"))
129
130 if requestURI == nil {
131 p.breakOnError(ctx, errors.New(`missing "mortyurl" URL parameter`))
132 return
133 }
134
135 if p.Key != nil {
136 if !verifyRequestURI(requestURI, requestHash, p.Key) {
137 p.breakOnError(ctx, errors.New("invalid hash"))
138 return
139 }
140 }
141
142 parsedURI, err := url.Parse(string(requestURI))
143
144 if p.breakOnError(ctx, err) {
145 return
146 }
147
148 req := fasthttp.AcquireRequest()
149 defer fasthttp.ReleaseRequest(req)
150
151 reqQuery := parsedURI.Query()
152 ctx.QueryArgs().VisitAll(func(key, value []byte) {
153 reqQuery.Add(string(key), string(value))
154 })
155
156 parsedURI.RawQuery = reqQuery.Encode()
157
158 uriStr := parsedURI.String()
159
160 log.Println("getting", uriStr)
161
162 req.SetRequestURI(uriStr)
163 req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36"))
164
165 resp := fasthttp.AcquireResponse()
166 defer fasthttp.ReleaseResponse(resp)
167
168 req.Header.SetMethodBytes(ctx.Method())
169 if ctx.IsPost() || ctx.IsPut() {
170 req.SetBody(ctx.PostBody())
171 }
172
173 if p.breakOnError(ctx, CLIENT.DoTimeout(req, resp, p.RequestTimeout)) {
174 return
175 }
176
177 if resp.StatusCode() != 200 {
178 switch resp.StatusCode() {
179 case 301, 302, 303, 307, 308:
180 loc := resp.Header.Peek("Location")
181 if loc != nil {
182 url, err := proxifyURI(&RequestConfig{p.Key, parsedURI}, string(loc))
183 if err == nil {
184 ctx.SetStatusCode(resp.StatusCode())
185 ctx.Response.Header.Add("Location", url)
186 log.Println("redirect to", string(loc))
187 return
188 }
189 }
190 }
191 log.Println("invalid request:", resp.StatusCode())
192 return
193 }
194
195 contentType := resp.Header.Peek("Content-Type")
196
197 if contentType == nil {
198 p.breakOnError(ctx, errors.New("invalid content type"))
199 return
200 }
201
202 contentInfo := bytes.SplitN(contentType, []byte(";"), 2)
203
204 var responseBody []byte
205
206 if len(contentInfo) == 2 && bytes.Contains(contentInfo[1], []byte("ISO-8859-2")) && bytes.Contains(contentInfo[0], []byte("text")) {
207 var err error
208 responseBody, err = charmap.ISO8859_2.NewDecoder().Bytes(resp.Body())
209 if p.breakOnError(ctx, err) {
210 return
211 }
212 } else {
213 responseBody = resp.Body()
214 }
215
216 ctx.SetContentType(fmt.Sprintf("%s; charset=UTF-8", contentInfo[0]))
217
218 switch {
219 case bytes.Contains(contentType, []byte("css")):
220 sanitizeCSS(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
221 case bytes.Contains(contentType, []byte("html")):
222 sanitizeHTML(&RequestConfig{p.Key, parsedURI}, ctx, responseBody)
223 default:
224 ctx.Write(responseBody)
225 }
226
227}
228
229func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
230 param := ctx.QueryArgs().PeekBytes(paramName)
231
232 if param == nil {
233 param = ctx.PostArgs().PeekBytes(paramName)
234 if param != nil {
235 ctx.PostArgs().DelBytes(paramName)
236 }
237 } else {
238 ctx.QueryArgs().DelBytes(paramName)
239 }
240
241 return param
242}
243
244func sanitizeCSS(rc *RequestConfig, ctx *fasthttp.RequestCtx, css []byte) {
245 // TODO
246
247 urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
248
249 if urlSlices == nil {
250 ctx.Write(css)
251 return
252 }
253
254 startIndex := 0
255
256 for _, s := range urlSlices {
257 urlStart := s[4]
258 urlEnd := s[5]
259
260 if uri, err := proxifyURI(rc, string(css[urlStart:urlEnd])); err == nil {
261 ctx.Write(css[startIndex:urlStart])
262 ctx.Write([]byte(uri))
263 startIndex = urlEnd
264 } else {
265 log.Println("cannot proxify css uri:", css[urlStart:urlEnd])
266 }
267 }
268 if startIndex < len(css) {
269 ctx.Write(css[startIndex:len(css)])
270 }
271}
272
273func sanitizeHTML(rc *RequestConfig, ctx *fasthttp.RequestCtx, htmlDoc []byte) {
274 r := bytes.NewReader(htmlDoc)
275 decoder := html.NewTokenizer(r)
276 decoder.AllowCDATA(true)
277
278 unsafeElements := make([][]byte, 0, 8)
279 state := STATE_DEFAULT
280
281 for {
282 token := decoder.Next()
283 if token == html.ErrorToken {
284 err := decoder.Err()
285 if err != io.EOF {
286 log.Println("failed to parse HTML:")
287 }
288 break
289 }
290
291 if len(unsafeElements) == 0 {
292
293 switch token {
294 case html.StartTagToken, html.SelfClosingTagToken:
295 tag, hasAttrs := decoder.TagName()
296 safe := !inArray(tag, UNSAFE_ELEMENTS)
297 if !safe {
298 if !inArray(tag, SELF_CLOSING_ELEMENTS) {
299 var unsafeTag []byte = make([]byte, len(tag))
300 copy(unsafeTag, tag)
301 unsafeElements = append(unsafeElements, unsafeTag)
302 }
303 break
304 }
305 if bytes.Equal(tag, []byte("noscript")) {
306 state = STATE_IN_NOSCRIPT
307 break
308 }
309 var attrs [][][]byte
310 fmt.Fprintf(ctx, "<%s", tag)
311 if hasAttrs {
312 for {
313 attrName, attrValue, moreAttr := decoder.TagAttr()
314 attrs = append(attrs, [][]byte{attrName, attrValue})
315 if !moreAttr {
316 break
317 }
318 }
319 if bytes.Equal(tag, []byte("meta")) {
320 sanitizeMetaAttrs(rc, ctx, attrs)
321 } else {
322 sanitizeAttrs(rc, ctx, attrs)
323 }
324 }
325 if token == html.SelfClosingTagToken {
326 fmt.Fprintf(ctx, " />")
327 } else {
328 fmt.Fprintf(ctx, ">")
329 if bytes.Equal(tag, []byte("style")) {
330 state = STATE_IN_STYLE
331 }
332 }
333 if bytes.Equal(tag, []byte("form")) {
334 var formURL *url.URL
335 for _, attr := range attrs {
336 if bytes.Equal(attr[0], []byte("action")) {
337 formURL, _ = url.Parse(string(attr[1]))
338 mergeURIs(rc.baseURL, formURL)
339 break
340 }
341 }
342 if formURL == nil {
343 formURL = rc.baseURL
344 }
345 urlStr := formURL.String()
346 var key string
347 if rc.Key != nil {
348 key = hash(urlStr, rc.Key)
349 }
350 fmt.Fprintf(ctx, HTML_FORM_EXTENSION, urlStr, key)
351
352 }
353
354 case html.EndTagToken:
355 tag, _ := decoder.TagName()
356 writeEndTag := true
357 switch string(tag) {
358 case "body":
359 fmt.Fprintf(ctx, HTML_BODY_EXTENSION, rc.baseURL.String())
360 case "style":
361 state = STATE_DEFAULT
362 case "noscript":
363 state = STATE_DEFAULT
364 writeEndTag = false
365 }
366 // skip noscript tags - only the tag, not the content, because javascript is sanitized
367 if writeEndTag {
368 fmt.Fprintf(ctx, "</%s>", tag)
369 }
370
371 case html.TextToken:
372 switch state {
373 case STATE_DEFAULT:
374 fmt.Fprintf(ctx, "%s", decoder.Raw())
375 case STATE_IN_STYLE:
376 sanitizeCSS(rc, ctx, decoder.Raw())
377 case STATE_IN_NOSCRIPT:
378 sanitizeHTML(rc, ctx, decoder.Raw())
379 }
380
381 case html.DoctypeToken, html.CommentToken:
382 ctx.Write(decoder.Raw())
383 }
384 } else {
385 switch token {
386 case html.StartTagToken:
387 tag, _ := decoder.TagName()
388 if inArray(tag, UNSAFE_ELEMENTS) {
389 unsafeElements = append(unsafeElements, tag)
390 }
391
392 case html.EndTagToken:
393 tag, _ := decoder.TagName()
394 if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
395 unsafeElements = unsafeElements[:len(unsafeElements)-1]
396 }
397 }
398 }
399 }
400}
401
402func sanitizeMetaAttrs(rc *RequestConfig, ctx *fasthttp.RequestCtx, attrs [][][]byte) {
403 var http_equiv []byte
404 var content []byte
405
406 for _, attr := range attrs {
407 attrName := attr[0]
408 attrValue := attr[1]
409 if bytes.Equal(attrName, []byte("http-equiv")) {
410 http_equiv = bytes.ToLower(attrValue)
411 }
412 if bytes.Equal(attrName, []byte("content")) {
413 content = attrValue
414 }
415 }
416
417 if bytes.Equal(http_equiv, []byte("refresh")) && bytes.Index(content, []byte(";url=")) != -1 {
418 parts := bytes.SplitN(content, []byte(";url="), 2)
419 if uri, err := proxifyURI(rc, string(parts[1])); err == nil {
420 fmt.Fprintf(ctx, ` http-equiv="refresh" content="%s;%s"`, parts[0], uri)
421 }
422 } else {
423 sanitizeAttrs(rc, ctx, attrs)
424 }
425
426}
427
428func sanitizeAttrs(rc *RequestConfig, ctx *fasthttp.RequestCtx, attrs [][][]byte) {
429 for _, attr := range attrs {
430 sanitizeAttr(rc, ctx, attr[0], attr[1])
431 }
432}
433
434func sanitizeAttr(rc *RequestConfig, ctx *fasthttp.RequestCtx, attrName, attrValue []byte) {
435 if inArray(attrName, SAFE_ATTRIBUTES) {
436 fmt.Fprintf(ctx, " %s=\"%s\"", attrName, attrValue)
437 return
438 }
439 switch string(attrName) {
440 case "src", "href", "action":
441 if uri, err := proxifyURI(rc, string(attrValue)); err == nil {
442 fmt.Fprintf(ctx, " %s=\"%s\"", attrName, uri)
443 } else {
444 log.Println("cannot proxify uri:", attrValue)
445 }
446 case "style":
447 fmt.Fprintf(ctx, " %s=\"", attrName)
448 sanitizeCSS(rc, ctx, attrValue)
449 ctx.Write([]byte("\""))
450 }
451}
452
453func mergeURIs(u1, u2 *url.URL) {
454 if u2.Scheme == "" || u2.Scheme == "//" {
455 u2.Scheme = u1.Scheme
456 }
457 if u2.Host == "" && u1.Path != "" {
458 u2.Host = u1.Host
459 if len(u2.Path) == 0 || u2.Path[0] != '/' {
460 u2.Path = path.Join(u1.Path[:strings.LastIndexByte(u1.Path, byte('/'))], u2.Path)
461 }
462 }
463}
464
465func proxifyURI(rc *RequestConfig, uri string) (string, error) {
466 // TODO check malicious data: - e.g. data:script
467 if strings.HasPrefix(uri, "data:") {
468 return uri, nil
469 }
470
471 if len(uri) > 0 && uri[0] == '#' {
472 return uri, nil
473 }
474
475 u, err := url.Parse(uri)
476 if err != nil {
477 return "", err
478 }
479 mergeURIs(rc.baseURL, u)
480
481 uri = u.String()
482
483 if rc.Key == nil {
484 return fmt.Sprintf("./?mortyurl=%s", url.QueryEscape(uri)), nil
485 }
486 return fmt.Sprintf("./?mortyhash=%s&mortyurl=%s", hash(uri, rc.Key), url.QueryEscape(uri)), nil
487}
488
489func inArray(b []byte, a [][]byte) bool {
490 for _, b2 := range a {
491 if bytes.Equal(b, b2) {
492 return true
493 }
494 }
495 return false
496}
497
498func hash(msg string, key []byte) string {
499 mac := hmac.New(sha256.New, key)
500 mac.Write([]byte(msg))
501 return hex.EncodeToString(mac.Sum(nil))
502}
503
504func verifyRequestURI(uri, hashMsg, key []byte) bool {
505 h := make([]byte, hex.DecodedLen(len(hashMsg)))
506 _, err := hex.Decode(h, hashMsg)
507 if err != nil {
508 log.Println("hmac error:", err)
509 return false
510 }
511 mac := hmac.New(sha256.New, key)
512 mac.Write(uri)
513 return hmac.Equal(h, mac.Sum(nil))
514}
515
516func (p *Proxy) breakOnError(ctx *fasthttp.RequestCtx, err error) bool {
517 if err == nil {
518 return false
519 }
520 log.Println("error:", err)
521 ctx.SetStatusCode(404)
522 ctx.SetContentType("text/html")
523 ctx.Write([]byte(`<!doctype html>
524<head>
525<title>MortyError</title>
526</head>
527<body><h2>Error!</h2>`))
528 ctx.Write([]byte("<h3>"))
529 ctx.Write([]byte(html.EscapeString(err.Error())))
530 ctx.Write([]byte("</h3>"))
531 if p.Key == nil {
532 ctx.Write([]byte(`
533<form action="post">
534 Visit url: <input placeholder="https://url.." name="mortyurl" />
535 <input type="submit" value="go" />
536</form>`))
537 }
538 ctx.Write([]byte(`
539</body>
540</html>`))
541 return true
542}
543
544func main() {
545
546 listen := flag.String("listen", "127.0.0.1:3000", "Listen address")
547 key := flag.String("key", "", "HMAC url validation key (hexadecimal encoded) - leave blank to disable")
548 requestTimeout := flag.Uint("timeout", 2, "Request timeout")
549 flag.Parse()
550
551 p := &Proxy{RequestTimeout: time.Duration(*requestTimeout) * time.Second}
552
553 if *key != "" {
554 p.Key = []byte(*key)
555 }
556
557 log.Println("listening on", *listen)
558
559 if err := fasthttp.ListenAndServe(*listen, p.RequestHandler); err != nil {
560 log.Fatal("Error in ListenAndServe:", err)
561 }
562}
Note: See TracBrowser for help on using the repository browser.