[142] | 1 | package main
|
---|
| 2 |
|
---|
| 3 | import (
|
---|
| 4 | "bytes"
|
---|
| 5 | "crypto/hmac"
|
---|
| 6 | "crypto/sha256"
|
---|
| 7 | "encoding/base64"
|
---|
| 8 | "encoding/hex"
|
---|
| 9 | "errors"
|
---|
| 10 | "flag"
|
---|
| 11 | "fmt"
|
---|
| 12 | "html/template"
|
---|
| 13 | "io"
|
---|
| 14 | "log"
|
---|
| 15 | "mime"
|
---|
| 16 | "net/url"
|
---|
| 17 | "os"
|
---|
| 18 | "path/filepath"
|
---|
| 19 | "regexp"
|
---|
| 20 | "strings"
|
---|
| 21 | "time"
|
---|
| 22 | "unicode/utf8"
|
---|
| 23 |
|
---|
| 24 | "github.com/valyala/fasthttp"
|
---|
| 25 | "github.com/valyala/fasthttp/fasthttpproxy"
|
---|
| 26 | "golang.org/x/net/html"
|
---|
| 27 | "golang.org/x/net/html/charset"
|
---|
| 28 | "golang.org/x/text/encoding"
|
---|
| 29 |
|
---|
| 30 | "marisa.chaotic.ninja/yukari/config"
|
---|
| 31 | "marisa.chaotic.ninja/yukari/contenttype"
|
---|
| 32 | )
|
---|
| 33 |
|
---|
| 34 | const (
|
---|
| 35 | STATE_DEFAULT int = 0
|
---|
| 36 | STATE_IN_STYLE int = 1
|
---|
| 37 | STATE_IN_NOSCRIPT int = 2
|
---|
| 38 | )
|
---|
| 39 |
|
---|
| 40 | const VERSION = "v0.2.1"
|
---|
| 41 |
|
---|
| 42 | const MAX_REDIRECT_COUNT = 5
|
---|
| 43 |
|
---|
| 44 | var CLIENT *fasthttp.Client = &fasthttp.Client{
|
---|
| 45 | MaxResponseBodySize: 10 * 1024 * 1024, // 10M
|
---|
| 46 | ReadBufferSize: 16 * 1024, // 16K
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 | var cfg *config.Config = config.DefaultConfig
|
---|
| 50 |
|
---|
| 51 | var ALLOWED_CONTENTTYPE_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
|
---|
| 52 | // html
|
---|
| 53 | contenttype.NewFilterEquals("text", "html", ""),
|
---|
| 54 | contenttype.NewFilterEquals("application", "xhtml", "xml"),
|
---|
| 55 | // css
|
---|
| 56 | contenttype.NewFilterEquals("text", "css", ""),
|
---|
| 57 | // images
|
---|
| 58 | contenttype.NewFilterEquals("image", "gif", ""),
|
---|
| 59 | contenttype.NewFilterEquals("image", "png", ""),
|
---|
| 60 | contenttype.NewFilterEquals("image", "jpeg", ""),
|
---|
| 61 | contenttype.NewFilterEquals("image", "pjpeg", ""),
|
---|
| 62 | contenttype.NewFilterEquals("image", "webp", ""),
|
---|
| 63 | contenttype.NewFilterEquals("image", "tiff", ""),
|
---|
| 64 | contenttype.NewFilterEquals("image", "vnd.microsoft.icon", ""),
|
---|
| 65 | contenttype.NewFilterEquals("image", "bmp", ""),
|
---|
| 66 | contenttype.NewFilterEquals("image", "x-ms-bmp", ""),
|
---|
| 67 | contenttype.NewFilterEquals("image", "x-icon", ""),
|
---|
| 68 | contenttype.NewFilterEquals("image", "svg", "xml"),
|
---|
| 69 | // fonts
|
---|
| 70 | contenttype.NewFilterEquals("application", "font-otf", ""),
|
---|
| 71 | contenttype.NewFilterEquals("application", "font-ttf", ""),
|
---|
| 72 | contenttype.NewFilterEquals("application", "font-woff", ""),
|
---|
| 73 | contenttype.NewFilterEquals("application", "vnd.ms-fontobject", ""),
|
---|
| 74 | })
|
---|
| 75 |
|
---|
| 76 | var ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER contenttype.Filter = contenttype.NewFilterOr([]contenttype.Filter{
|
---|
| 77 | // texts
|
---|
| 78 | contenttype.NewFilterEquals("text", "csv", ""),
|
---|
| 79 | contenttype.NewFilterEquals("text", "tab-separated-values", ""),
|
---|
| 80 | contenttype.NewFilterEquals("text", "plain", ""),
|
---|
| 81 | // API
|
---|
| 82 | contenttype.NewFilterEquals("application", "json", ""),
|
---|
| 83 | // Documents
|
---|
| 84 | contenttype.NewFilterEquals("application", "x-latex", ""),
|
---|
| 85 | contenttype.NewFilterEquals("application", "pdf", ""),
|
---|
| 86 | contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.text", ""),
|
---|
| 87 | contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.spreadsheet", ""),
|
---|
| 88 | contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.presentation", ""),
|
---|
| 89 | contenttype.NewFilterEquals("application", "vnd.oasis.opendocument.graphics", ""),
|
---|
| 90 | // Compressed archives
|
---|
| 91 | contenttype.NewFilterEquals("application", "zip", ""),
|
---|
| 92 | contenttype.NewFilterEquals("application", "gzip", ""),
|
---|
| 93 | contenttype.NewFilterEquals("application", "x-compressed", ""),
|
---|
| 94 | contenttype.NewFilterEquals("application", "x-gtar", ""),
|
---|
| 95 | contenttype.NewFilterEquals("application", "x-compress", ""),
|
---|
| 96 | // Generic binary
|
---|
| 97 | contenttype.NewFilterEquals("application", "octet-stream", ""),
|
---|
| 98 | })
|
---|
| 99 |
|
---|
| 100 | var ALLOWED_CONTENTTYPE_PARAMETERS map[string]bool = map[string]bool{
|
---|
| 101 | "charset": true,
|
---|
| 102 | }
|
---|
| 103 |
|
---|
| 104 | var UNSAFE_ELEMENTS [][]byte = [][]byte{
|
---|
| 105 | []byte("applet"),
|
---|
| 106 | []byte("canvas"),
|
---|
| 107 | []byte("embed"),
|
---|
| 108 | //[]byte("iframe"),
|
---|
| 109 | []byte("math"),
|
---|
| 110 | []byte("script"),
|
---|
| 111 | []byte("svg"),
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | var SAFE_ATTRIBUTES [][]byte = [][]byte{
|
---|
| 115 | []byte("abbr"),
|
---|
| 116 | []byte("accesskey"),
|
---|
| 117 | []byte("align"),
|
---|
| 118 | []byte("alt"),
|
---|
| 119 | []byte("as"),
|
---|
| 120 | []byte("autocomplete"),
|
---|
| 121 | []byte("charset"),
|
---|
| 122 | []byte("checked"),
|
---|
| 123 | []byte("class"),
|
---|
| 124 | []byte("content"),
|
---|
| 125 | []byte("contenteditable"),
|
---|
| 126 | []byte("contextmenu"),
|
---|
| 127 | []byte("dir"),
|
---|
| 128 | []byte("for"),
|
---|
| 129 | []byte("height"),
|
---|
| 130 | []byte("hidden"),
|
---|
| 131 | []byte("hreflang"),
|
---|
| 132 | []byte("id"),
|
---|
| 133 | []byte("lang"),
|
---|
| 134 | []byte("media"),
|
---|
| 135 | []byte("method"),
|
---|
| 136 | []byte("name"),
|
---|
| 137 | []byte("nowrap"),
|
---|
| 138 | []byte("placeholder"),
|
---|
| 139 | []byte("property"),
|
---|
| 140 | []byte("rel"),
|
---|
| 141 | []byte("spellcheck"),
|
---|
| 142 | []byte("tabindex"),
|
---|
| 143 | []byte("target"),
|
---|
| 144 | []byte("title"),
|
---|
| 145 | []byte("translate"),
|
---|
| 146 | []byte("type"),
|
---|
| 147 | []byte("value"),
|
---|
| 148 | []byte("width"),
|
---|
| 149 | }
|
---|
| 150 |
|
---|
| 151 | var LINK_REL_SAFE_VALUES [][]byte = [][]byte{
|
---|
| 152 | []byte("alternate"),
|
---|
| 153 | []byte("archives"),
|
---|
| 154 | []byte("author"),
|
---|
| 155 | []byte("copyright"),
|
---|
| 156 | []byte("first"),
|
---|
| 157 | []byte("help"),
|
---|
| 158 | []byte("icon"),
|
---|
| 159 | []byte("index"),
|
---|
| 160 | []byte("last"),
|
---|
| 161 | []byte("license"),
|
---|
| 162 | []byte("manifest"),
|
---|
| 163 | []byte("next"),
|
---|
| 164 | []byte("pingback"),
|
---|
| 165 | []byte("prev"),
|
---|
| 166 | []byte("publisher"),
|
---|
| 167 | []byte("search"),
|
---|
| 168 | []byte("shortcut icon"),
|
---|
| 169 | []byte("stylesheet"),
|
---|
| 170 | []byte("up"),
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | var LINK_HTTP_EQUIV_SAFE_VALUES [][]byte = [][]byte{
|
---|
| 174 | // X-UA-Compatible will be added automaticaly, so it can be skipped
|
---|
| 175 | []byte("date"),
|
---|
| 176 | []byte("last-modified"),
|
---|
| 177 | []byte("refresh"), // URL rewrite
|
---|
| 178 | // []byte("location"), TODO URL rewrite
|
---|
| 179 | []byte("content-language"),
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | var CSS_URL_REGEXP *regexp.Regexp = regexp.MustCompile("url\\((['\"]?)[ \\t\\f]*([\u0009\u0021\u0023-\u0026\u0028\u002a-\u007E]+)(['\"]?)\\)?")
|
---|
| 183 |
|
---|
| 184 | type Proxy struct {
|
---|
| 185 | Key []byte
|
---|
| 186 | RequestTimeout time.Duration
|
---|
| 187 | FollowRedirect bool
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | type RequestConfig struct {
|
---|
| 191 | Key []byte
|
---|
| 192 | BaseURL *url.URL
|
---|
| 193 | BodyInjected bool
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | type HTMLBodyExtParam struct {
|
---|
| 197 | BaseURL string
|
---|
| 198 | HasYukariKey bool
|
---|
| 199 | URLParamName string
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | type HTMLFormExtParam struct {
|
---|
| 203 | BaseURL string
|
---|
| 204 | YukariHash string
|
---|
| 205 | URLParamName string
|
---|
| 206 | HashParamName string
|
---|
| 207 | }
|
---|
| 208 | type HTMLMainPageFormParam struct {
|
---|
| 209 | URLParamName string
|
---|
| 210 | }
|
---|
| 211 |
|
---|
| 212 | var HTML_FORM_EXTENSION *template.Template
|
---|
| 213 | var HTML_BODY_EXTENSION *template.Template
|
---|
| 214 | var HTML_MAIN_PAGE_FORM *template.Template
|
---|
| 215 | var HTML_HEAD_CONTENT_TYPE string = `<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
---|
| 216 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
|
---|
| 217 | <meta name="referrer" content="no-referrer">
|
---|
| 218 | `
|
---|
| 219 |
|
---|
| 220 | var YUKARI_HTML_PAGE_START string = `<!doctype html>
|
---|
| 221 | <html>
|
---|
[143] | 222 | <head>
|
---|
| 223 | <title>Yukari's Gap</title>
|
---|
| 224 | <meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
|
---|
| 225 | <style>
|
---|
| 226 | html { height: 100%; }
|
---|
| 227 | body { min-height : 100%; display: flex; flex-direction:column; font-family: 'Garamond', 'Georgia', serif; text-align: center; color: #BC4BFC; background: #FAFAFA; margin: 0;
|
---|
| 228 | padding: 0; font-size: 1.1em; }
|
---|
| 229 | input { border: 1px solid #888; padding: 0.3em; color: #BC4BFC; background: #FFF; font-size: 1.1em; }
|
---|
| 230 | input[placeholder] { width:80%; }
|
---|
| 231 | a { text-decoration: none; #2980b9; }
|
---|
| 232 | h1, h2 { font-weight: 200; margin-bottom: 2rem; }
|
---|
| 233 | h1 { font-size: 3em; }
|
---|
| 234 | .container { flex:1; min-height: 100%; margin-bottom: 1em; }
|
---|
| 235 | .footer { margin: 1em; }
|
---|
| 236 | .footer p { font-size: 0.8em; }
|
---|
| 237 | </style>
|
---|
| 238 | </head>
|
---|
| 239 | <body>
|
---|
| 240 | <div class="container">
|
---|
| 241 | <h1>Yukari's Gap</h1>
|
---|
[142] | 242 | `
|
---|
| 243 |
|
---|
| 244 | var YUKARI_HTML_PAGE_END string = `
|
---|
[143] | 245 | </div>
|
---|
| 246 | <div class="footer">
|
---|
| 247 | <p>Yukari rewrites web pages to exclude malicious HTML tags and CSS/HTML attributes. It also replaces external resource references to prevent third-party information leaks.<br />
|
---|
| 248 | <a href="https://git.chaotic.ninja/yakumo.izuru/yukari">view on 「混沌とした 忍者」Git</a>
|
---|
[142] | 249 | </p>
|
---|
| 250 | </div>
|
---|
| 251 | </body>
|
---|
| 252 | </html>`
|
---|
| 253 |
|
---|
| 254 | var FAVICON_BYTES []byte
|
---|
| 255 |
|
---|
| 256 | func init() {
|
---|
| 257 | FaviconBase64 := "iVBORw0KGgoAAAANSUhEUgAAABAAAAAQEAYAAABPYyMiAAAABmJLR0T///////8JWPfcAAAACXBIWXMAAABIAAAASABGyWs+AAAAF0lEQVRIx2NgGAWjYBSMglEwCkbBSAcACBAAAeaR9cIAAAAASUVORK5CYII"
|
---|
| 258 |
|
---|
| 259 | FAVICON_BYTES, _ = base64.StdEncoding.DecodeString(FaviconBase64)
|
---|
| 260 | var err error
|
---|
| 261 | HTML_FORM_EXTENSION, err = template.New("html_form_extension").Parse(
|
---|
| 262 | `<input type="hidden" name="yukariurl" value="{{.BaseURL}}" />{{if .YukariHash}}<input type="hidden" name="yukarihash" value="{{.YukariHash}}" />{{end}}`)
|
---|
| 263 | if err != nil {
|
---|
| 264 | panic(err)
|
---|
| 265 | }
|
---|
| 266 | HTML_BODY_EXTENSION, err = template.New("html_body_extension").Parse(`
|
---|
| 267 | <input type="checkbox" id="yukaritoggle" autocomplete="off" />
|
---|
| 268 | <div id="yukariheader">
|
---|
| 269 | <form method="get">
|
---|
| 270 | <label for="yukaritoggle">hide</label>
|
---|
| 271 | <span><a href="/">Yukari's Gap</a></span>
|
---|
| 272 | <input type="url" value="{{.BaseURL}}" name="{{.URLParamName}}" {{if .HasYukariKey }}readonly="true"{{end}} />
|
---|
[143] | 273 | This is a <a href="https://git.chaotic.ninja/yakumo.izuru/yukari">proxified and sanitized</a> view of the page, visit <a href="{{.BaseURL}}" rel="noreferrer">original site</a>.
|
---|
[142] | 274 | </form>
|
---|
| 275 | </div>
|
---|
| 276 | <style>
|
---|
| 277 | body{ position: absolute !important; top: 42px !important; left: 0 !important; right: 0 !important; bottom: 0 !important; }
|
---|
| 278 | #yukariheader { position: fixed; margin: 0; box-sizing: border-box; -webkit-box-sizing: border-box; top: 0; left: 0; right: 0; z-index: 2147483647 !important; font-size: 12px; line-height: normal; border-width: 0px 0px 2px 0; border-style: solid; border-color: #AAAAAA; background: #FFF; padding: 4px; color: #444; height: 42px; }
|
---|
| 279 | #yukariheader * { padding: 0; margin: 0; }
|
---|
| 280 | #yukariheader p { padding: 0 0 0.7em 0; display: block; }
|
---|
[143] | 281 | #yukariheader a { color: #8934DB; font-weight: bold; display: inline; }
|
---|
[142] | 282 | #yukariheader label { text-align: right; cursor: pointer; position: fixed; right: 4px; top: 4px; display: block; color: #444; }
|
---|
| 283 | #yukariheader > form > span { font-size: 24px; font-weight: bold; margin-right: 20px; margin-left: 20px; }
|
---|
| 284 | input[type=checkbox]#yukaritoggle { display: none; }
|
---|
| 285 | input[type=checkbox]#yukaritoggle:checked ~ div { display: none; visibility: hidden; }
|
---|
| 286 | #yukariheader input[type=url] { width: 50%; padding: 4px; font-size: 16px; }
|
---|
| 287 | </style>
|
---|
| 288 | `)
|
---|
| 289 | if err != nil {
|
---|
| 290 | panic(err)
|
---|
| 291 | }
|
---|
| 292 | HTML_MAIN_PAGE_FORM, err = template.New("html_main_page_form").Parse(`
|
---|
| 293 | <form action="post">
|
---|
| 294 | Visit url: <input placeholder="https://url.." name="{{.URLParamName}}" autofocus />
|
---|
| 295 | <input type="submit" value="go" />
|
---|
| 296 | </form>`)
|
---|
| 297 | if err != nil {
|
---|
| 298 | panic(err)
|
---|
| 299 | }
|
---|
| 300 | }
|
---|
| 301 |
|
---|
| 302 | func (p *Proxy) RequestHandler(ctx *fasthttp.RequestCtx) {
|
---|
| 303 |
|
---|
| 304 | if appRequestHandler(ctx) {
|
---|
| 305 | return
|
---|
| 306 | }
|
---|
| 307 |
|
---|
| 308 | requestHash := popRequestParam(ctx, []byte(cfg.HashParameter))
|
---|
| 309 |
|
---|
| 310 | requestURI := popRequestParam(ctx, []byte(cfg.UrlParameter))
|
---|
| 311 |
|
---|
| 312 | if requestURI == nil {
|
---|
| 313 | p.serveMainPage(ctx, 200, nil)
|
---|
| 314 | return
|
---|
| 315 | }
|
---|
| 316 |
|
---|
| 317 | if p.Key != nil {
|
---|
| 318 | if !verifyRequestURI(requestURI, requestHash, p.Key) {
|
---|
| 319 | // HTTP status code 403 : Forbidden
|
---|
| 320 | error_message := fmt.Sprintf(`invalid "%s" parameter. hint: Hash URL Parameter`, cfg.HashParameter)
|
---|
| 321 | p.serveMainPage(ctx, 403, errors.New(error_message))
|
---|
| 322 | return
|
---|
| 323 | }
|
---|
| 324 | }
|
---|
| 325 |
|
---|
| 326 | requestURIQuery := ctx.QueryArgs().QueryString()
|
---|
| 327 | if len(requestURIQuery) > 0 {
|
---|
| 328 | if bytes.ContainsRune(requestURI, '?') {
|
---|
| 329 | requestURI = append(requestURI, '&')
|
---|
| 330 | } else {
|
---|
| 331 | requestURI = append(requestURI, '?')
|
---|
| 332 | }
|
---|
| 333 | requestURI = append(requestURI, requestURIQuery...)
|
---|
| 334 | }
|
---|
| 335 |
|
---|
| 336 | p.ProcessUri(ctx, string(requestURI), 0)
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 | func (p *Proxy) ProcessUri(ctx *fasthttp.RequestCtx, requestURIStr string, redirectCount int) {
|
---|
| 340 | parsedURI, err := url.Parse(requestURIStr)
|
---|
| 341 |
|
---|
| 342 | if err != nil {
|
---|
| 343 | // HTTP status code 500 : Internal Server Error
|
---|
| 344 | p.serveMainPage(ctx, 500, err)
|
---|
| 345 | return
|
---|
| 346 | }
|
---|
| 347 |
|
---|
| 348 | if parsedURI.Scheme == "" {
|
---|
| 349 | requestURIStr = "https://" + requestURIStr
|
---|
| 350 | parsedURI, err = url.Parse(requestURIStr)
|
---|
| 351 | if err != nil {
|
---|
| 352 | p.serveMainPage(ctx, 500, err)
|
---|
| 353 | return
|
---|
| 354 | }
|
---|
| 355 | }
|
---|
| 356 |
|
---|
| 357 | // Serve an intermediate page for protocols other than HTTP(S)
|
---|
| 358 | if (parsedURI.Scheme != "http" && parsedURI.Scheme != "https") || strings.HasSuffix(parsedURI.Host, ".onion") {
|
---|
| 359 | p.serveExitYukariPage(ctx, parsedURI)
|
---|
| 360 | return
|
---|
| 361 | }
|
---|
| 362 |
|
---|
| 363 | req := fasthttp.AcquireRequest()
|
---|
| 364 | defer fasthttp.ReleaseRequest(req)
|
---|
| 365 | req.SetConnectionClose()
|
---|
| 366 |
|
---|
| 367 | if cfg.Debug {
|
---|
| 368 | log.Println(string(ctx.Method()), requestURIStr)
|
---|
| 369 | }
|
---|
| 370 |
|
---|
| 371 | req.SetRequestURI(requestURIStr)
|
---|
| 372 | req.Header.SetUserAgentBytes([]byte("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"))
|
---|
| 373 |
|
---|
| 374 | resp := fasthttp.AcquireResponse()
|
---|
| 375 | defer fasthttp.ReleaseResponse(resp)
|
---|
| 376 |
|
---|
| 377 | req.Header.SetMethodBytes(ctx.Method())
|
---|
| 378 | if ctx.IsPost() || ctx.IsPut() {
|
---|
| 379 | req.SetBody(ctx.PostBody())
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 | err = CLIENT.DoTimeout(req, resp, p.RequestTimeout)
|
---|
| 383 |
|
---|
| 384 | if err != nil {
|
---|
| 385 | if err == fasthttp.ErrTimeout {
|
---|
| 386 | // HTTP status code 504 : Gateway Time-Out
|
---|
| 387 | p.serveMainPage(ctx, 504, err)
|
---|
| 388 | } else {
|
---|
| 389 | // HTTP status code 500 : Internal Server Error
|
---|
| 390 | p.serveMainPage(ctx, 500, err)
|
---|
| 391 | }
|
---|
| 392 | return
|
---|
| 393 | }
|
---|
| 394 |
|
---|
| 395 | if resp.StatusCode() != 200 {
|
---|
| 396 | switch resp.StatusCode() {
|
---|
| 397 | case 301, 302, 303, 307, 308:
|
---|
| 398 | loc := resp.Header.Peek("Location")
|
---|
| 399 | if loc != nil {
|
---|
| 400 | if p.FollowRedirect && ctx.IsGet() {
|
---|
| 401 | // GET method: Yukari follows the redirect
|
---|
| 402 | if redirectCount < MAX_REDIRECT_COUNT {
|
---|
| 403 | if cfg.Debug {
|
---|
| 404 | log.Println("follow redirect to", string(loc))
|
---|
| 405 | }
|
---|
| 406 | p.ProcessUri(ctx, string(loc), redirectCount+1)
|
---|
| 407 | } else {
|
---|
| 408 | p.serveMainPage(ctx, 310, errors.New("Too many redirects"))
|
---|
| 409 | }
|
---|
| 410 | return
|
---|
| 411 | } else {
|
---|
| 412 | // Other HTTP methods: Yukari does NOT follow the redirect
|
---|
| 413 | rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
|
---|
| 414 | url, err := rc.ProxifyURI(loc)
|
---|
| 415 | if err == nil {
|
---|
| 416 | ctx.SetStatusCode(resp.StatusCode())
|
---|
| 417 | ctx.Response.Header.Add("Location", url)
|
---|
| 418 | if cfg.Debug {
|
---|
| 419 | log.Println("redirect to", string(loc))
|
---|
| 420 | }
|
---|
| 421 | return
|
---|
| 422 | }
|
---|
| 423 | }
|
---|
| 424 | }
|
---|
| 425 | }
|
---|
| 426 | error_message := fmt.Sprintf("invalid response: %d (%s)", resp.StatusCode(), requestURIStr)
|
---|
| 427 | p.serveMainPage(ctx, resp.StatusCode(), errors.New(error_message))
|
---|
| 428 | return
|
---|
| 429 | }
|
---|
| 430 |
|
---|
| 431 | contentTypeBytes := resp.Header.Peek("Content-Type")
|
---|
| 432 |
|
---|
| 433 | if contentTypeBytes == nil {
|
---|
| 434 | // HTTP status code 503 : Service Unavailable
|
---|
| 435 | p.serveMainPage(ctx, 503, errors.New("invalid content type"))
|
---|
| 436 | return
|
---|
| 437 | }
|
---|
| 438 |
|
---|
| 439 | contentTypeString := string(contentTypeBytes)
|
---|
| 440 |
|
---|
| 441 | // decode Content-Type header
|
---|
| 442 | contentType, error := contenttype.ParseContentType(contentTypeString)
|
---|
| 443 | if error != nil {
|
---|
| 444 | // HTTP status code 503 : Service Unavailable
|
---|
| 445 | p.serveMainPage(ctx, 503, errors.New("invalid content type"))
|
---|
| 446 | return
|
---|
| 447 | }
|
---|
| 448 |
|
---|
| 449 | // content-disposition
|
---|
| 450 | contentDispositionBytes := ctx.Request.Header.Peek("Content-Disposition")
|
---|
| 451 |
|
---|
| 452 | // check content type
|
---|
| 453 | if !ALLOWED_CONTENTTYPE_FILTER(contentType) {
|
---|
| 454 | // it is not a usual content type
|
---|
| 455 | if ALLOWED_CONTENTTYPE_ATTACHMENT_FILTER(contentType) {
|
---|
| 456 | // force attachment for allowed content type
|
---|
| 457 | contentDispositionBytes = contentDispositionForceAttachment(contentDispositionBytes, parsedURI)
|
---|
| 458 | } else {
|
---|
| 459 | // deny access to forbidden content type
|
---|
| 460 | // HTTP status code 403 : Forbidden
|
---|
| 461 | p.serveMainPage(ctx, 403, errors.New("forbidden content type "+parsedURI.String()))
|
---|
| 462 | return
|
---|
| 463 | }
|
---|
| 464 | }
|
---|
| 465 |
|
---|
| 466 | // HACK : replace */xhtml by text/html
|
---|
| 467 | if contentType.SubType == "xhtml" {
|
---|
| 468 | contentType.TopLevelType = "text"
|
---|
| 469 | contentType.SubType = "html"
|
---|
| 470 | contentType.Suffix = ""
|
---|
| 471 | }
|
---|
| 472 |
|
---|
| 473 | // conversion to UTF-8
|
---|
| 474 | var responseBody []byte
|
---|
| 475 |
|
---|
| 476 | if contentType.TopLevelType == "text" {
|
---|
| 477 | e, ename, _ := charset.DetermineEncoding(resp.Body(), contentTypeString)
|
---|
| 478 | if (e != encoding.Nop) && (!strings.EqualFold("utf-8", ename)) {
|
---|
| 479 | responseBody, err = e.NewDecoder().Bytes(resp.Body())
|
---|
| 480 | if err != nil {
|
---|
| 481 | // HTTP status code 503 : Service Unavailable
|
---|
| 482 | p.serveMainPage(ctx, 503, err)
|
---|
| 483 | return
|
---|
| 484 | }
|
---|
| 485 | } else {
|
---|
| 486 | responseBody = resp.Body()
|
---|
| 487 | }
|
---|
| 488 | // update the charset or specify it
|
---|
| 489 | contentType.Parameters["charset"] = "UTF-8"
|
---|
| 490 | } else {
|
---|
| 491 | responseBody = resp.Body()
|
---|
| 492 | }
|
---|
| 493 |
|
---|
| 494 | //
|
---|
| 495 | contentType.FilterParameters(ALLOWED_CONTENTTYPE_PARAMETERS)
|
---|
| 496 |
|
---|
| 497 | // set the content type
|
---|
| 498 | ctx.SetContentType(contentType.String())
|
---|
| 499 |
|
---|
| 500 | // output according to MIME type
|
---|
| 501 | switch {
|
---|
| 502 | case contentType.SubType == "css" && contentType.Suffix == "":
|
---|
| 503 | sanitizeCSS(&RequestConfig{Key: p.Key, BaseURL: parsedURI}, ctx, responseBody)
|
---|
| 504 | case contentType.SubType == "html" && contentType.Suffix == "":
|
---|
| 505 | rc := &RequestConfig{Key: p.Key, BaseURL: parsedURI}
|
---|
| 506 | sanitizeHTML(rc, ctx, responseBody)
|
---|
| 507 | if !rc.BodyInjected {
|
---|
| 508 | p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
|
---|
| 509 | if len(rc.Key) > 0 {
|
---|
| 510 | p.HasYukariKey = true
|
---|
| 511 | }
|
---|
| 512 | err := HTML_BODY_EXTENSION.Execute(ctx, p)
|
---|
| 513 | if err != nil {
|
---|
| 514 | if cfg.Debug {
|
---|
| 515 | fmt.Println("failed to inject body extension", err)
|
---|
| 516 | }
|
---|
| 517 | }
|
---|
| 518 | }
|
---|
| 519 | default:
|
---|
| 520 | if contentDispositionBytes != nil {
|
---|
| 521 | ctx.Response.Header.AddBytesV("Content-Disposition", contentDispositionBytes)
|
---|
| 522 | }
|
---|
| 523 | ctx.Write(responseBody)
|
---|
| 524 | }
|
---|
| 525 | }
|
---|
| 526 |
|
---|
| 527 | // force content-disposition to attachment
|
---|
| 528 | func contentDispositionForceAttachment(contentDispositionBytes []byte, url *url.URL) []byte {
|
---|
| 529 | var contentDispositionParams map[string]string
|
---|
| 530 |
|
---|
| 531 | if contentDispositionBytes != nil {
|
---|
| 532 | var err error
|
---|
| 533 | _, contentDispositionParams, err = mime.ParseMediaType(string(contentDispositionBytes))
|
---|
| 534 | if err != nil {
|
---|
| 535 | contentDispositionParams = make(map[string]string)
|
---|
| 536 | }
|
---|
| 537 | } else {
|
---|
| 538 | contentDispositionParams = make(map[string]string)
|
---|
| 539 | }
|
---|
| 540 |
|
---|
| 541 | _, fileNameDefined := contentDispositionParams["filename"]
|
---|
| 542 | if !fileNameDefined {
|
---|
| 543 | // TODO : sanitize filename
|
---|
| 544 | contentDispositionParams["fileName"] = filepath.Base(url.Path)
|
---|
| 545 | }
|
---|
| 546 |
|
---|
| 547 | return []byte(mime.FormatMediaType("attachment", contentDispositionParams))
|
---|
| 548 | }
|
---|
| 549 |
|
---|
| 550 | func appRequestHandler(ctx *fasthttp.RequestCtx) bool {
|
---|
| 551 | // serve robots.txt
|
---|
| 552 | if bytes.Equal(ctx.Path(), []byte("/robots.txt")) {
|
---|
| 553 | ctx.SetContentType("text/plain")
|
---|
| 554 | ctx.Write([]byte("User-Agent: *\nDisallow: /\n"))
|
---|
| 555 | return true
|
---|
| 556 | }
|
---|
| 557 |
|
---|
| 558 | // server favicon.ico
|
---|
| 559 | if bytes.Equal(ctx.Path(), []byte("/favicon.ico")) {
|
---|
| 560 | ctx.SetContentType("image/png")
|
---|
| 561 | ctx.Write(FAVICON_BYTES)
|
---|
| 562 | return true
|
---|
| 563 | }
|
---|
| 564 |
|
---|
| 565 | return false
|
---|
| 566 | }
|
---|
| 567 |
|
---|
| 568 | func popRequestParam(ctx *fasthttp.RequestCtx, paramName []byte) []byte {
|
---|
| 569 | param := ctx.QueryArgs().PeekBytes(paramName)
|
---|
| 570 |
|
---|
| 571 | if param == nil {
|
---|
| 572 | param = ctx.PostArgs().PeekBytes(paramName)
|
---|
| 573 | ctx.PostArgs().DelBytes(paramName)
|
---|
| 574 | }
|
---|
| 575 | ctx.QueryArgs().DelBytes(paramName)
|
---|
| 576 |
|
---|
| 577 | return param
|
---|
| 578 | }
|
---|
| 579 |
|
---|
| 580 | func sanitizeCSS(rc *RequestConfig, out io.Writer, css []byte) {
|
---|
| 581 | // TODO
|
---|
| 582 |
|
---|
| 583 | urlSlices := CSS_URL_REGEXP.FindAllSubmatchIndex(css, -1)
|
---|
| 584 |
|
---|
| 585 | if urlSlices == nil {
|
---|
| 586 | out.Write(css)
|
---|
| 587 | return
|
---|
| 588 | }
|
---|
| 589 |
|
---|
| 590 | startIndex := 0
|
---|
| 591 |
|
---|
| 592 | for _, s := range urlSlices {
|
---|
| 593 | urlStart := s[4]
|
---|
| 594 | urlEnd := s[5]
|
---|
| 595 |
|
---|
| 596 | if uri, err := rc.ProxifyURI(css[urlStart:urlEnd]); err == nil {
|
---|
| 597 | out.Write(css[startIndex:urlStart])
|
---|
| 598 | out.Write([]byte(uri))
|
---|
| 599 | startIndex = urlEnd
|
---|
| 600 | } else if cfg.Debug {
|
---|
| 601 | log.Println("cannot proxify css uri:", string(css[urlStart:urlEnd]))
|
---|
| 602 | }
|
---|
| 603 | }
|
---|
| 604 | if startIndex < len(css) {
|
---|
| 605 | out.Write(css[startIndex:len(css)])
|
---|
| 606 | }
|
---|
| 607 | }
|
---|
| 608 |
|
---|
| 609 | func sanitizeHTML(rc *RequestConfig, out io.Writer, htmlDoc []byte) {
|
---|
| 610 | r := bytes.NewReader(htmlDoc)
|
---|
| 611 | decoder := html.NewTokenizer(r)
|
---|
| 612 | decoder.AllowCDATA(true)
|
---|
| 613 |
|
---|
| 614 | unsafeElements := make([][]byte, 0, 8)
|
---|
| 615 | state := STATE_DEFAULT
|
---|
| 616 | for {
|
---|
| 617 | token := decoder.Next()
|
---|
| 618 | if token == html.ErrorToken {
|
---|
| 619 | err := decoder.Err()
|
---|
| 620 | if err != io.EOF {
|
---|
| 621 | log.Println("failed to parse HTML")
|
---|
| 622 | }
|
---|
| 623 | break
|
---|
| 624 | }
|
---|
| 625 |
|
---|
| 626 | if len(unsafeElements) == 0 {
|
---|
| 627 |
|
---|
| 628 | switch token {
|
---|
| 629 | case html.StartTagToken, html.SelfClosingTagToken:
|
---|
| 630 | tag, hasAttrs := decoder.TagName()
|
---|
| 631 | safe := !inArray(tag, UNSAFE_ELEMENTS)
|
---|
| 632 | if !safe {
|
---|
| 633 | if token != html.SelfClosingTagToken {
|
---|
| 634 | var unsafeTag []byte = make([]byte, len(tag))
|
---|
| 635 | copy(unsafeTag, tag)
|
---|
| 636 | unsafeElements = append(unsafeElements, unsafeTag)
|
---|
| 637 | }
|
---|
| 638 | break
|
---|
| 639 | }
|
---|
| 640 | if bytes.Equal(tag, []byte("base")) {
|
---|
| 641 | for {
|
---|
| 642 | attrName, attrValue, moreAttr := decoder.TagAttr()
|
---|
| 643 | if bytes.Equal(attrName, []byte("href")) {
|
---|
| 644 | parsedURI, err := url.Parse(string(attrValue))
|
---|
| 645 | if err == nil {
|
---|
| 646 | rc.BaseURL = parsedURI
|
---|
| 647 | }
|
---|
| 648 | }
|
---|
| 649 | if !moreAttr {
|
---|
| 650 | break
|
---|
| 651 | }
|
---|
| 652 | }
|
---|
| 653 | break
|
---|
| 654 | }
|
---|
| 655 | if bytes.Equal(tag, []byte("noscript")) {
|
---|
| 656 | state = STATE_IN_NOSCRIPT
|
---|
| 657 | break
|
---|
| 658 | }
|
---|
| 659 | var attrs [][][]byte
|
---|
| 660 | if hasAttrs {
|
---|
| 661 | for {
|
---|
| 662 | attrName, attrValue, moreAttr := decoder.TagAttr()
|
---|
| 663 | attrs = append(attrs, [][]byte{
|
---|
| 664 | attrName,
|
---|
| 665 | attrValue,
|
---|
| 666 | []byte(html.EscapeString(string(attrValue))),
|
---|
| 667 | })
|
---|
| 668 | if !moreAttr {
|
---|
| 669 | break
|
---|
| 670 | }
|
---|
| 671 | }
|
---|
| 672 | }
|
---|
| 673 | if bytes.Equal(tag, []byte("link")) {
|
---|
| 674 | sanitizeLinkTag(rc, out, attrs)
|
---|
| 675 | break
|
---|
| 676 | }
|
---|
| 677 |
|
---|
| 678 | if bytes.Equal(tag, []byte("meta")) {
|
---|
| 679 | sanitizeMetaTag(rc, out, attrs)
|
---|
| 680 | break
|
---|
| 681 | }
|
---|
| 682 |
|
---|
| 683 | fmt.Fprintf(out, "<%s", tag)
|
---|
| 684 |
|
---|
| 685 | if hasAttrs {
|
---|
| 686 | sanitizeAttrs(rc, out, attrs)
|
---|
| 687 | }
|
---|
| 688 |
|
---|
| 689 | if token == html.SelfClosingTagToken {
|
---|
| 690 | fmt.Fprintf(out, " />")
|
---|
| 691 | } else {
|
---|
| 692 | fmt.Fprintf(out, ">")
|
---|
| 693 | if bytes.Equal(tag, []byte("style")) {
|
---|
| 694 | state = STATE_IN_STYLE
|
---|
| 695 | }
|
---|
| 696 | }
|
---|
| 697 |
|
---|
| 698 | if bytes.Equal(tag, []byte("head")) {
|
---|
| 699 | fmt.Fprintf(out, HTML_HEAD_CONTENT_TYPE)
|
---|
| 700 | }
|
---|
| 701 |
|
---|
| 702 | if bytes.Equal(tag, []byte("form")) {
|
---|
| 703 | var formURL *url.URL
|
---|
| 704 | for _, attr := range attrs {
|
---|
| 705 | if bytes.Equal(attr[0], []byte("action")) {
|
---|
| 706 | formURL, _ = url.Parse(string(attr[1]))
|
---|
| 707 | formURL = mergeURIs(rc.BaseURL, formURL)
|
---|
| 708 | break
|
---|
| 709 | }
|
---|
| 710 | }
|
---|
| 711 | if formURL == nil {
|
---|
| 712 | formURL = rc.BaseURL
|
---|
| 713 | }
|
---|
| 714 | urlStr := formURL.String()
|
---|
| 715 | var key string
|
---|
| 716 | if rc.Key != nil {
|
---|
| 717 | key = hash(urlStr, rc.Key)
|
---|
| 718 | }
|
---|
| 719 | err := HTML_FORM_EXTENSION.Execute(out, HTMLFormExtParam{urlStr, key, cfg.UrlParameter, cfg.HashParameter})
|
---|
| 720 | if err != nil {
|
---|
| 721 | if cfg.Debug {
|
---|
| 722 | fmt.Println("failed to inject body extension", err)
|
---|
| 723 | }
|
---|
| 724 | }
|
---|
| 725 | }
|
---|
| 726 |
|
---|
| 727 | case html.EndTagToken:
|
---|
| 728 | tag, _ := decoder.TagName()
|
---|
| 729 | writeEndTag := true
|
---|
| 730 | switch string(tag) {
|
---|
| 731 | case "body":
|
---|
| 732 | p := HTMLBodyExtParam{rc.BaseURL.String(), false, cfg.UrlParameter}
|
---|
| 733 | if len(rc.Key) > 0 {
|
---|
| 734 | p.HasYukariKey = true
|
---|
| 735 | }
|
---|
| 736 | err := HTML_BODY_EXTENSION.Execute(out, p)
|
---|
| 737 | if err != nil {
|
---|
| 738 | if cfg.Debug {
|
---|
| 739 | fmt.Println("failed to inject body extension", err)
|
---|
| 740 | }
|
---|
| 741 | }
|
---|
| 742 | rc.BodyInjected = true
|
---|
| 743 | case "style":
|
---|
| 744 | state = STATE_DEFAULT
|
---|
| 745 | case "noscript":
|
---|
| 746 | state = STATE_DEFAULT
|
---|
| 747 | writeEndTag = false
|
---|
| 748 | }
|
---|
| 749 | // skip noscript tags - only the tag, not the content, because javascript is sanitized
|
---|
| 750 | if writeEndTag {
|
---|
| 751 | fmt.Fprintf(out, "</%s>", tag)
|
---|
| 752 | }
|
---|
| 753 |
|
---|
| 754 | case html.TextToken:
|
---|
| 755 | switch state {
|
---|
| 756 | case STATE_DEFAULT:
|
---|
| 757 | fmt.Fprintf(out, "%s", decoder.Raw())
|
---|
| 758 | case STATE_IN_STYLE:
|
---|
| 759 | sanitizeCSS(rc, out, decoder.Raw())
|
---|
| 760 | case STATE_IN_NOSCRIPT:
|
---|
| 761 | sanitizeHTML(rc, out, decoder.Raw())
|
---|
| 762 | }
|
---|
| 763 |
|
---|
| 764 | case html.CommentToken:
|
---|
| 765 | // ignore comment. TODO : parse IE conditional comment
|
---|
| 766 |
|
---|
| 767 | case html.DoctypeToken:
|
---|
| 768 | out.Write(decoder.Raw())
|
---|
| 769 | }
|
---|
| 770 | } else {
|
---|
| 771 | switch token {
|
---|
| 772 | case html.StartTagToken, html.SelfClosingTagToken:
|
---|
| 773 | tag, _ := decoder.TagName()
|
---|
| 774 | if inArray(tag, UNSAFE_ELEMENTS) {
|
---|
| 775 | unsafeElements = append(unsafeElements, tag)
|
---|
| 776 | }
|
---|
| 777 |
|
---|
| 778 | case html.EndTagToken:
|
---|
| 779 | tag, _ := decoder.TagName()
|
---|
| 780 | if bytes.Equal(unsafeElements[len(unsafeElements)-1], tag) {
|
---|
| 781 | unsafeElements = unsafeElements[:len(unsafeElements)-1]
|
---|
| 782 | }
|
---|
| 783 | }
|
---|
| 784 | }
|
---|
| 785 | }
|
---|
| 786 | }
|
---|
| 787 |
|
---|
| 788 | func sanitizeLinkTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
|
---|
| 789 | exclude := false
|
---|
| 790 | for _, attr := range attrs {
|
---|
| 791 | attrName := attr[0]
|
---|
| 792 | attrValue := attr[1]
|
---|
| 793 | if bytes.Equal(attrName, []byte("rel")) {
|
---|
| 794 | if !inArray(attrValue, LINK_REL_SAFE_VALUES) {
|
---|
| 795 | exclude = true
|
---|
| 796 | break
|
---|
| 797 | }
|
---|
| 798 | }
|
---|
| 799 | if bytes.Equal(attrName, []byte("as")) {
|
---|
| 800 | if bytes.Equal(attrValue, []byte("script")) {
|
---|
| 801 | exclude = true
|
---|
| 802 | break
|
---|
| 803 | }
|
---|
| 804 | }
|
---|
| 805 | }
|
---|
| 806 |
|
---|
| 807 | if !exclude {
|
---|
| 808 | out.Write([]byte("<link"))
|
---|
| 809 | for _, attr := range attrs {
|
---|
| 810 | sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
|
---|
| 811 | }
|
---|
| 812 | out.Write([]byte(">"))
|
---|
| 813 | }
|
---|
| 814 | }
|
---|
| 815 |
|
---|
| 816 | func sanitizeMetaTag(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
|
---|
| 817 | var http_equiv []byte
|
---|
| 818 | var content []byte
|
---|
| 819 |
|
---|
| 820 | for _, attr := range attrs {
|
---|
| 821 | attrName := attr[0]
|
---|
| 822 | attrValue := attr[1]
|
---|
| 823 | if bytes.Equal(attrName, []byte("http-equiv")) {
|
---|
| 824 | http_equiv = bytes.ToLower(attrValue)
|
---|
| 825 | // exclude some <meta http-equiv="..." ..>
|
---|
| 826 | if !inArray(http_equiv, LINK_HTTP_EQUIV_SAFE_VALUES) {
|
---|
| 827 | return
|
---|
| 828 | }
|
---|
| 829 | }
|
---|
| 830 | if bytes.Equal(attrName, []byte("content")) {
|
---|
| 831 | content = attrValue
|
---|
| 832 | }
|
---|
| 833 | if bytes.Equal(attrName, []byte("charset")) {
|
---|
| 834 | // exclude <meta charset="...">
|
---|
| 835 | return
|
---|
| 836 | }
|
---|
| 837 | }
|
---|
| 838 |
|
---|
| 839 | out.Write([]byte("<meta"))
|
---|
| 840 | urlIndex := bytes.Index(bytes.ToLower(content), []byte("url="))
|
---|
| 841 | if bytes.Equal(http_equiv, []byte("refresh")) && urlIndex != -1 {
|
---|
| 842 | contentUrl := content[urlIndex+4:]
|
---|
| 843 | // special case of <meta http-equiv="refresh" content="0; url='example.com/url.with.quote.outside'">
|
---|
| 844 | if len(contentUrl) >= 2 && (contentUrl[0] == byte('\'') || contentUrl[0] == byte('"')) {
|
---|
| 845 | if contentUrl[0] == contentUrl[len(contentUrl)-1] {
|
---|
| 846 | contentUrl = contentUrl[1 : len(contentUrl)-1]
|
---|
| 847 | }
|
---|
| 848 | }
|
---|
| 849 | // output proxify result
|
---|
| 850 | if uri, err := rc.ProxifyURI(contentUrl); err == nil {
|
---|
| 851 | fmt.Fprintf(out, ` http-equiv="refresh" content="%surl=%s"`, content[:urlIndex], uri)
|
---|
| 852 | }
|
---|
| 853 | } else {
|
---|
| 854 | if len(http_equiv) > 0 {
|
---|
| 855 | fmt.Fprintf(out, ` http-equiv="%s"`, http_equiv)
|
---|
| 856 | }
|
---|
| 857 | sanitizeAttrs(rc, out, attrs)
|
---|
| 858 | }
|
---|
| 859 | out.Write([]byte(">"))
|
---|
| 860 | }
|
---|
| 861 |
|
---|
| 862 | func sanitizeAttrs(rc *RequestConfig, out io.Writer, attrs [][][]byte) {
|
---|
| 863 | for _, attr := range attrs {
|
---|
| 864 | sanitizeAttr(rc, out, attr[0], attr[1], attr[2])
|
---|
| 865 | }
|
---|
| 866 | }
|
---|
| 867 |
|
---|
| 868 | func sanitizeAttr(rc *RequestConfig, out io.Writer, attrName, attrValue, escapedAttrValue []byte) {
|
---|
| 869 | if inArray(attrName, SAFE_ATTRIBUTES) {
|
---|
| 870 | fmt.Fprintf(out, " %s=\"%s\"", attrName, escapedAttrValue)
|
---|
| 871 | return
|
---|
| 872 | }
|
---|
| 873 | switch string(attrName) {
|
---|
| 874 | case "src", "href", "action":
|
---|
| 875 | if uri, err := rc.ProxifyURI(attrValue); err == nil {
|
---|
| 876 | fmt.Fprintf(out, " %s=\"%s\"", attrName, uri)
|
---|
| 877 | } else if cfg.Debug {
|
---|
| 878 | log.Println("cannot proxify uri:", string(attrValue))
|
---|
| 879 | }
|
---|
| 880 | case "style":
|
---|
| 881 | cssAttr := bytes.NewBuffer(nil)
|
---|
| 882 | sanitizeCSS(rc, cssAttr, attrValue)
|
---|
| 883 | fmt.Fprintf(out, " %s=\"%s\"", attrName, html.EscapeString(string(cssAttr.Bytes())))
|
---|
| 884 | }
|
---|
| 885 | }
|
---|
| 886 |
|
---|
| 887 | func mergeURIs(u1, u2 *url.URL) *url.URL {
|
---|
| 888 | if u2 == nil {
|
---|
| 889 | return u1
|
---|
| 890 | }
|
---|
| 891 | return u1.ResolveReference(u2)
|
---|
| 892 | }
|
---|
| 893 |
|
---|
| 894 | // Sanitized URI : removes all runes bellow 32 (included) as the begining and end of URI, and lower case the scheme.
|
---|
| 895 | // avoid memory allocation (except for the scheme)
|
---|
| 896 | func sanitizeURI(uri []byte) ([]byte, string) {
|
---|
| 897 | first_rune_index := 0
|
---|
| 898 | first_rune_seen := false
|
---|
| 899 | scheme_last_index := -1
|
---|
| 900 | buffer := bytes.NewBuffer(make([]byte, 0, 10))
|
---|
| 901 |
|
---|
| 902 | // remove trailing space and special characters
|
---|
| 903 | uri = bytes.TrimRight(uri, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20")
|
---|
| 904 |
|
---|
| 905 | // loop over byte by byte
|
---|
| 906 | for i, c := range uri {
|
---|
| 907 | // ignore special characters and space (c <= 32)
|
---|
| 908 | if c > 32 {
|
---|
| 909 | // append to the lower case of the rune to buffer
|
---|
| 910 | if c < utf8.RuneSelf && 'A' <= c && c <= 'Z' {
|
---|
| 911 | c = c + 'a' - 'A'
|
---|
| 912 | }
|
---|
| 913 |
|
---|
| 914 | buffer.WriteByte(c)
|
---|
| 915 |
|
---|
| 916 | // update the first rune index that is not a special rune
|
---|
| 917 | if !first_rune_seen {
|
---|
| 918 | first_rune_index = i
|
---|
| 919 | first_rune_seen = true
|
---|
| 920 | }
|
---|
| 921 |
|
---|
| 922 | if c == ':' {
|
---|
| 923 | // colon rune found, we have found the scheme
|
---|
| 924 | scheme_last_index = i
|
---|
| 925 | break
|
---|
| 926 | } else if c == '/' || c == '?' || c == '\\' || c == '#' {
|
---|
| 927 | // special case : most probably a relative URI
|
---|
| 928 | break
|
---|
| 929 | }
|
---|
| 930 | }
|
---|
| 931 | }
|
---|
| 932 |
|
---|
| 933 | if scheme_last_index != -1 {
|
---|
| 934 | // scheme found
|
---|
| 935 | // copy the "lower case without special runes scheme" before the ":" rune
|
---|
| 936 | scheme_start_index := scheme_last_index - buffer.Len() + 1
|
---|
| 937 | copy(uri[scheme_start_index:], buffer.Bytes())
|
---|
| 938 | // and return the result
|
---|
| 939 | return uri[scheme_start_index:], buffer.String()
|
---|
| 940 | } else {
|
---|
| 941 | // scheme NOT found
|
---|
| 942 | return uri[first_rune_index:], ""
|
---|
| 943 | }
|
---|
| 944 | }
|
---|
| 945 |
|
---|
| 946 | func (rc *RequestConfig) ProxifyURI(uri []byte) (string, error) {
|
---|
| 947 | // sanitize URI
|
---|
| 948 | uri, scheme := sanitizeURI(uri)
|
---|
| 949 |
|
---|
| 950 | // remove javascript protocol
|
---|
| 951 | if scheme == "javascript:" {
|
---|
| 952 | return "", nil
|
---|
| 953 | }
|
---|
| 954 |
|
---|
| 955 | // TODO check malicious data: - e.g. data:script
|
---|
| 956 | if scheme == "data:" {
|
---|
| 957 | if bytes.HasPrefix(uri, []byte("data:image/png")) ||
|
---|
| 958 | bytes.HasPrefix(uri, []byte("data:image/jpeg")) ||
|
---|
| 959 | bytes.HasPrefix(uri, []byte("data:image/pjpeg")) ||
|
---|
| 960 | bytes.HasPrefix(uri, []byte("data:image/gif")) ||
|
---|
| 961 | bytes.HasPrefix(uri, []byte("data:image/webp")) {
|
---|
| 962 | // should be safe
|
---|
| 963 | return string(uri), nil
|
---|
| 964 | } else {
|
---|
| 965 | // unsafe data
|
---|
| 966 | return "", nil
|
---|
| 967 | }
|
---|
| 968 | }
|
---|
| 969 |
|
---|
| 970 | // parse the uri
|
---|
| 971 | u, err := url.Parse(string(uri))
|
---|
| 972 | if err != nil {
|
---|
| 973 | return "", err
|
---|
| 974 | }
|
---|
| 975 |
|
---|
| 976 | // get the fragment (with the prefix "#")
|
---|
| 977 | fragment := ""
|
---|
| 978 | if len(u.Fragment) > 0 {
|
---|
| 979 | fragment = "#" + u.Fragment
|
---|
| 980 | }
|
---|
| 981 |
|
---|
| 982 | // reset the fragment: it is not included in the yukariurl
|
---|
| 983 | u.Fragment = ""
|
---|
| 984 |
|
---|
| 985 | // merge the URI with the document URI
|
---|
| 986 | u = mergeURIs(rc.BaseURL, u)
|
---|
| 987 |
|
---|
| 988 | // simple internal link ?
|
---|
| 989 | // some web pages describe the whole link https://same:auth@same.host/same.path?same.query#new.fragment
|
---|
| 990 | if u.Scheme == rc.BaseURL.Scheme &&
|
---|
| 991 | (rc.BaseURL.User == nil || (u.User != nil && u.User.String() == rc.BaseURL.User.String())) &&
|
---|
| 992 | u.Host == rc.BaseURL.Host &&
|
---|
| 993 | u.Path == rc.BaseURL.Path &&
|
---|
| 994 | u.RawQuery == rc.BaseURL.RawQuery {
|
---|
| 995 | // the fragment is the only difference between the document URI and the uri parameter
|
---|
| 996 | return fragment, nil
|
---|
| 997 | }
|
---|
| 998 |
|
---|
| 999 | // return full URI and fragment (if not empty)
|
---|
| 1000 | yukari_uri := u.String()
|
---|
| 1001 |
|
---|
| 1002 | if rc.Key == nil {
|
---|
| 1003 | return fmt.Sprintf("./?%s=%s%s", cfg.UrlParameter, url.QueryEscape(yukari_uri), fragment), nil
|
---|
| 1004 | }
|
---|
| 1005 | return fmt.Sprintf("./?%s=%s&%s=%s%s", cfg.HashParameter, hash(yukari_uri, rc.Key), cfg.UrlParameter, url.QueryEscape(yukari_uri), fragment), nil
|
---|
| 1006 | }
|
---|
| 1007 |
|
---|
| 1008 | func inArray(b []byte, a [][]byte) bool {
|
---|
| 1009 | for _, b2 := range a {
|
---|
| 1010 | if bytes.Equal(b, b2) {
|
---|
| 1011 | return true
|
---|
| 1012 | }
|
---|
| 1013 | }
|
---|
| 1014 | return false
|
---|
| 1015 | }
|
---|
| 1016 |
|
---|
| 1017 | func hash(msg string, key []byte) string {
|
---|
| 1018 | mac := hmac.New(sha256.New, key)
|
---|
| 1019 | mac.Write([]byte(msg))
|
---|
| 1020 | return hex.EncodeToString(mac.Sum(nil))
|
---|
| 1021 | }
|
---|
| 1022 |
|
---|
| 1023 | func verifyRequestURI(uri, hashMsg, key []byte) bool {
|
---|
| 1024 | h := make([]byte, hex.DecodedLen(len(hashMsg)))
|
---|
| 1025 | _, err := hex.Decode(h, hashMsg)
|
---|
| 1026 | if err != nil {
|
---|
| 1027 | if cfg.Debug {
|
---|
| 1028 | log.Println("hmac error:", err)
|
---|
| 1029 | }
|
---|
| 1030 | return false
|
---|
| 1031 | }
|
---|
| 1032 | mac := hmac.New(sha256.New, key)
|
---|
| 1033 | mac.Write(uri)
|
---|
| 1034 | return hmac.Equal(h, mac.Sum(nil))
|
---|
| 1035 | }
|
---|
| 1036 |
|
---|
| 1037 | func (p *Proxy) serveExitYukariPage(ctx *fasthttp.RequestCtx, uri *url.URL) {
|
---|
| 1038 | ctx.SetContentType("text/html")
|
---|
| 1039 | ctx.SetStatusCode(403)
|
---|
| 1040 | ctx.Write([]byte(YUKARI_HTML_PAGE_START))
|
---|
| 1041 | ctx.Write([]byte("<h2>You are about to exit YukariSukima</h2>"))
|
---|
| 1042 | ctx.Write([]byte("<p>Following</p><p><a href=\""))
|
---|
| 1043 | ctx.Write([]byte(html.EscapeString(uri.String())))
|
---|
| 1044 | ctx.Write([]byte("\" rel=\"noreferrer\">"))
|
---|
| 1045 | ctx.Write([]byte(html.EscapeString(uri.String())))
|
---|
| 1046 | ctx.Write([]byte("</a></p><p>the content of this URL will be <b>NOT</b> sanitized.</p>"))
|
---|
| 1047 | ctx.Write([]byte(YUKARI_HTML_PAGE_END))
|
---|
| 1048 | }
|
---|
| 1049 |
|
---|
| 1050 | func (p *Proxy) serveMainPage(ctx *fasthttp.RequestCtx, statusCode int, err error) {
|
---|
| 1051 | ctx.SetContentType("text/html; charset=UTF-8")
|
---|
| 1052 | ctx.SetStatusCode(statusCode)
|
---|
| 1053 | ctx.Write([]byte(YUKARI_HTML_PAGE_START))
|
---|
| 1054 | if err != nil {
|
---|
| 1055 | if cfg.Debug {
|
---|
| 1056 | log.Println("error:", err)
|
---|
| 1057 | }
|
---|
| 1058 | ctx.Write([]byte("<h2>Error: "))
|
---|
| 1059 | ctx.Write([]byte(html.EscapeString(err.Error())))
|
---|
| 1060 | ctx.Write([]byte("</h2>"))
|
---|
| 1061 | }
|
---|
| 1062 | if p.Key == nil {
|
---|
| 1063 | p := HTMLMainPageFormParam{cfg.UrlParameter}
|
---|
| 1064 | err := HTML_MAIN_PAGE_FORM.Execute(ctx, p)
|
---|
| 1065 | if err != nil {
|
---|
| 1066 | if cfg.Debug {
|
---|
| 1067 | fmt.Println("failed to inject main page form", err)
|
---|
| 1068 | }
|
---|
| 1069 | }
|
---|
| 1070 | } else {
|
---|
| 1071 | ctx.Write([]byte(`<h3>Warning! This instance does not support direct URL opening.</h3>`))
|
---|
| 1072 | }
|
---|
| 1073 | ctx.Write([]byte(YUKARI_HTML_PAGE_END))
|
---|
| 1074 | }
|
---|
| 1075 |
|
---|
| 1076 | func main() {
|
---|
| 1077 | listenAddress := flag.String("listen", cfg.ListenAddress, "Listen address")
|
---|
| 1078 | key := flag.String("key", cfg.Key, "HMAC url validation key (base64 encoded) - leave blank to disable validation")
|
---|
| 1079 | IPV6 := flag.Bool("ipv6", cfg.IPV6, "Allow IPv6 HTTP requests")
|
---|
| 1080 | debug := flag.Bool("debug", cfg.Debug, "Debug mode")
|
---|
| 1081 | requestTimeout := flag.Uint("timeout", cfg.RequestTimeout, "Request timeout")
|
---|
| 1082 | followRedirect := flag.Bool("followredirect", cfg.FollowRedirect, "Follow HTTP GET redirect")
|
---|
| 1083 | proxyenv := flag.Bool("proxyenv", false, "Use a HTTP proxy as set in the environment (HTTP_PROXY, HTTPS_PROXY and NO_PROXY). Overrides -proxy, -socks5, -ipv6.")
|
---|
| 1084 | proxy := flag.String("proxy", "", "Use the specified HTTP proxy (ie: '[user:pass@]hostname:port'). Overrides -socks5, -ipv6.")
|
---|
| 1085 | socks5 := flag.String("socks5", "", "Use a SOCKS5 proxy (ie: 'hostname:port'). Overrides -ipv6.")
|
---|
| 1086 | urlParameter := flag.String("urlparam", cfg.UrlParameter, "user-defined requesting string URL parameter name (ie: '/?url=...' or '/?u=...')")
|
---|
| 1087 | hashParameter := flag.String("hashparam", cfg.HashParameter, "user-defined requesting string HASH parameter name (ie: '/?hash=...' or '/?h=...')")
|
---|
| 1088 | version := flag.Bool("version", false, "Show version")
|
---|
| 1089 | flag.Parse()
|
---|
| 1090 |
|
---|
| 1091 | cfg.ListenAddress = *listenAddress
|
---|
| 1092 | cfg.Key = *key
|
---|
| 1093 | cfg.IPV6 = *IPV6
|
---|
| 1094 | cfg.Debug = *debug
|
---|
| 1095 | cfg.RequestTimeout = *requestTimeout
|
---|
| 1096 | cfg.FollowRedirect = *followRedirect
|
---|
| 1097 | cfg.UrlParameter = *urlParameter
|
---|
| 1098 | cfg.HashParameter = *hashParameter
|
---|
| 1099 |
|
---|
| 1100 | if *version {
|
---|
| 1101 | fmt.Println(VERSION)
|
---|
| 1102 | return
|
---|
| 1103 | }
|
---|
| 1104 |
|
---|
| 1105 | if *proxyenv && os.Getenv("HTTP_PROXY") == "" && os.Getenv("HTTPS_PROXY") == "" {
|
---|
| 1106 | log.Fatal("Error -proxyenv is used but no environment variables named 'HTTP_PROXY' and/or 'HTTPS_PROXY' could be found.")
|
---|
| 1107 | os.Exit(1)
|
---|
| 1108 | }
|
---|
| 1109 |
|
---|
| 1110 | if *proxyenv {
|
---|
| 1111 | CLIENT.Dial = fasthttpproxy.FasthttpProxyHTTPDialer()
|
---|
| 1112 | log.Println("Using environment defined proxy(ies).")
|
---|
| 1113 | } else if *proxy != "" {
|
---|
| 1114 | CLIENT.Dial = fasthttpproxy.FasthttpHTTPDialer(*proxy)
|
---|
| 1115 | log.Println("Using custom HTTP proxy.")
|
---|
| 1116 | } else if *socks5 != "" {
|
---|
| 1117 | CLIENT.Dial = fasthttpproxy.FasthttpSocksDialer(*socks5)
|
---|
| 1118 | log.Println("Using Socks5 proxy.")
|
---|
| 1119 | } else if cfg.IPV6 {
|
---|
| 1120 | CLIENT.Dial = fasthttp.DialDualStack
|
---|
| 1121 | log.Println("Using dual stack (IPv4/IPv6) direct connections.")
|
---|
| 1122 | } else {
|
---|
| 1123 | CLIENT.Dial = fasthttp.Dial
|
---|
| 1124 | log.Println("Using IPv4 only direct connections.")
|
---|
| 1125 | }
|
---|
| 1126 |
|
---|
| 1127 | p := &Proxy{RequestTimeout: time.Duration(cfg.RequestTimeout) * time.Second,
|
---|
| 1128 | FollowRedirect: cfg.FollowRedirect}
|
---|
| 1129 |
|
---|
| 1130 | if cfg.Key != "" {
|
---|
| 1131 | var err error
|
---|
| 1132 | p.Key, err = base64.StdEncoding.DecodeString(cfg.Key)
|
---|
| 1133 | if err != nil {
|
---|
| 1134 | log.Fatal("Error parsing -key", err.Error())
|
---|
| 1135 | os.Exit(1)
|
---|
| 1136 | }
|
---|
| 1137 | }
|
---|
| 1138 |
|
---|
| 1139 | log.Println("listening on", cfg.ListenAddress)
|
---|
| 1140 |
|
---|
| 1141 | if err := fasthttp.ListenAndServe(cfg.ListenAddress, p.RequestHandler); err != nil {
|
---|
| 1142 | log.Fatal("Error in ListenAndServe:", err)
|
---|
| 1143 | }
|
---|
| 1144 | }
|
---|