Package gluon :: Module rewrite
[hide private]
[frames] | no frames]

Source Code for Module gluon.rewrite

   1  #!/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3   
   4  """ 
   5  This file is part of the web2py Web Framework 
   6  Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 
   7  License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 
   8   
   9  gluon.rewrite parses incoming URLs and formats outgoing URLs for gluon.html.URL. 
  10   
  11  In addition, it rewrites both incoming and outgoing URLs based on the (optional) user-supplied routes.py, 
  12  which also allows for rewriting of certain error messages. 
  13   
  14  routes.py supports two styles of URL rewriting, depending on whether 'routers' is defined. 
  15  Refer to router.example.py and routes.example.py for additional documentation. 
  16   
  17  """ 
  18   
  19  import os 
  20  import re 
  21  import logging 
  22  import traceback 
  23  import threading 
  24  import urllib 
  25  from gluon.storage import Storage, List 
  26  from gluon.http import HTTP 
  27  from gluon.fileutils import abspath, read_file 
  28  from gluon.settings import global_settings 
  29   
  30  isdir = os.path.isdir 
  31  isfile = os.path.isfile 
  32  exists = os.path.exists 
  33  pjoin = os.path.join 
  34   
  35  logger = logging.getLogger('web2py.rewrite') 
  36  THREAD_LOCAL = threading.local()  # thread-local storage for routing params 
  37   
  38  regex_at = re.compile(r'(?<!\\)\$[a-zA-Z]\w*') 
  39  regex_anything = re.compile(r'(?<!\\)\$anything') 
  40  regex_redirect = re.compile(r'(\d+)->(.*)') 
  41  regex_full_url = re.compile( 
  42      r'^(?P<scheme>http|https|HTTP|HTTPS)\://(?P<host>[^/]*)(?P<uri>.*)') 
  43  regex_version = re.compile(r'^(_[\d]+\.[\d]+\.[\d]+)$') 
  44  # pattern to replace spaces with underscore in URL 
  45  #   also the html escaped variants '+' and '%20' are covered 
  46  regex_space = re.compile('(\+|\s|%20)+') 
  47   
  48  # pattern to find valid paths in url /application/controller/... 
  49  #   this could be: 
  50  #     for static pages: 
  51  #        /<b:application>/static/<x:file> 
  52  #     for dynamic pages: 
  53  #        /<a:application>[/<c:controller>[/<f:function>[.<e:ext>][/<s:args>]]] 
  54  #   application, controller, function and ext may only contain [a-zA-Z0-9_] 
  55  #   file and args may also contain '-', '=', '.' and '/' 
  56  #   apps in routes_apps_raw must parse raw_args into args 
  57   
  58  regex_url = re.compile('^/((?P<a>\w+)(/(?P<c>\w+)(/(?P<z>(?P<f>\w+)(\.(?P<e>[\w.]+))?(?P<s>.*)))?)?)?$') 
  59  regex_args = re.compile('^[/\w@=-]*(\.[/\w@=-]+)*$') 
60 61 62 -def _router_default():
63 "return new copy of default base router" 64 router = Storage( 65 default_application='init', 66 applications='ALL', 67 default_controller='default', 68 controllers='DEFAULT', 69 default_function='index', 70 functions=dict(), 71 default_language=None, 72 languages=None, 73 root_static=['favicon.ico', 'robots.txt'], 74 map_static=None, 75 domains=None, 76 exclusive_domain=False, 77 map_hyphen=False, 78 acfe_match=r'\w+$', # legal app/ctlr/fcn/ext 79 # 80 # Implementation note: 81 # The file_match & args_match patterns use look-behind to avoid 82 # pathological backtracking from nested patterns. 83 # 84 file_match = r'([-+=@$%\w]|(?<=[-+=@$%\w])[./])*$', # legal static subpath 85 args_match=r'([\w@ -]|(?<=[\w@ -])[.=])*$', # legal arg in args 86 ) 87 return router
88
89 90 -def _params_default(app=None):
91 "return new copy of default parameters" 92 p = Storage() 93 p.name = app or "BASE" 94 p.default_application = app or "init" 95 p.default_controller = "default" 96 p.default_function = "index" 97 p.routes_app = [] 98 p.routes_in = [] 99 p.routes_out = [] 100 p.routes_onerror = [] 101 p.routes_apps_raw = [] 102 p.error_handler = None 103 p.error_message = '<html><body><h1>%s</h1></body></html>' 104 p.error_message_ticket = \ 105 '<html><body><h1>Internal error</h1>Ticket issued: <a href="/admin/default/ticket/%(ticket)s" target="_blank">%(ticket)s</a></body><!-- this is junk text else IE does not display the page: ' + ('x' * 512) + ' //--></html>' 106 p.routers = None 107 p.logging = 'off' 108 return p
109 110 params_apps = dict() 111 params = _params_default(app=None) # regex rewrite parameters 112 THREAD_LOCAL.routes = params # default to base regex rewrite parameters 113 routers = None
114 115 116 -def log_rewrite(string):
117 "Log rewrite activity under control of routes.py" 118 if params.logging == 'debug': # catch common cases first 119 logger.debug(string) 120 elif params.logging == 'off' or not params.logging: 121 pass 122 elif params.logging == 'print': 123 print string 124 elif params.logging == 'info': 125 logger.info(string) 126 elif params.logging == 'warning': 127 logger.warning(string) 128 elif params.logging == 'error': 129 logger.error(string) 130 elif params.logging == 'critical': 131 logger.critical(string) 132 else: 133 logger.debug(string)
134 135 ROUTER_KEYS = set( 136 ('default_application', 'applications', 137 'default_controller', 'controllers', 138 'default_function', 'functions', 139 'default_language', 'languages', 140 'domain', 'domains', 'root_static', 'path_prefix', 141 'exclusive_domain', 'map_hyphen', 'map_static', 142 'acfe_match', 'file_match', 'args_match')) 143 144 ROUTER_BASE_KEYS = set( 145 ('applications', 'default_application', 146 'domains', 'path_prefix'))
147 148 # The external interface to rewrite consists of: 149 # 150 # load: load routing configuration file(s) 151 # url_in: parse and rewrite incoming URL 152 # url_out: assemble and rewrite outgoing URL 153 # 154 # THREAD_LOCAL.routes.default_application 155 # THREAD_LOCAL.routes.error_message 156 # THREAD_LOCAL.routes.error_message_ticket 157 # THREAD_LOCAL.routes.try_redirect_on_error 158 # THREAD_LOCAL.routes.error_handler 159 # 160 # filter_url: helper for doctest & unittest 161 # filter_err: helper for doctest & unittest 162 # regex_filter_out: doctest 163 164 165 -def fixup_missing_path_info(environ):
166 eget = environ.get 167 path_info = eget('PATH_INFO') 168 request_uri = eget('REQUEST_URI') 169 if not path_info and request_uri: 170 # for fcgi, get path_info and 171 # query_string from request_uri 172 items = request_uri.split('?') 173 path_info = environ['PATH_INFO'] = items[0] 174 environ['QUERY_STRING'] = items[1] if len(items) > 1 else '' 175 elif not request_uri: 176 query_string = eget('QUERY_STRING') 177 if query_string: 178 environ['REQUEST_URI'] = '%s?%s' % (path_info, query_string) 179 else: 180 environ['REQUEST_URI'] = path_info 181 if not eget('HTTP_HOST'): 182 environ['HTTP_HOST'] = \ 183 '%s:%s' % (eget('SERVER_NAME'), eget('SERVER_PORT'))
184
185 186 -def url_in(request, environ):
187 "parse and rewrite incoming URL" 188 if routers: 189 return map_url_in(request, environ) 190 return regex_url_in(request, environ)
191
192 193 -def url_out(request, environ, application, controller, function, 194 args, other, scheme, host, port):
195 "assemble and rewrite outgoing URL" 196 if routers: 197 acf = map_url_out(request, environ, application, controller, 198 function, args, other, scheme, host, port) 199 url = '%s%s' % (acf, other) 200 else: 201 url = '/%s/%s/%s%s' % (application, controller, function, other) 202 url = regex_filter_out(url, environ) 203 # 204 # fill in scheme and host if absolute URL is requested 205 # scheme can be a string, eg 'http', 'https', 'ws', 'wss' 206 # 207 if host is True or (host is None and (scheme or port is not None)): 208 host = request.env.http_host 209 if not scheme or scheme is True: 210 scheme = request.env.get('wsgi_url_scheme', 'http').lower() \ 211 if request else 'http' 212 if host: 213 host_port = host if not port else host.split(':', 1)[0] + ':%s' % port 214 url = '%s://%s%s' % (scheme, host_port, url) 215 return url
216
217 218 -def try_rewrite_on_error(http_response, request, environ, ticket=None):
219 """ 220 called from main.wsgibase to rewrite the http response. 221 """ 222 status = int(str(http_response.status).split()[0]) 223 if status >= 399 and THREAD_LOCAL.routes.routes_onerror: 224 keys = set(('%s/%s' % (request.application, status), 225 '%s/*' % (request.application), 226 '*/%s' % (status), 227 '*/*')) 228 for (key, uri) in THREAD_LOCAL.routes.routes_onerror: 229 if key in keys: 230 if uri == '!': 231 # do nothing! 232 return http_response, environ 233 elif '?' in uri: 234 path_info, query_string = uri.split('?', 1) 235 query_string += '&' 236 else: 237 path_info, query_string = uri, '' 238 query_string += \ 239 'code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 240 (status, ticket, urllib.quote_plus( 241 request.env.request_uri), request.url) 242 if uri.startswith('http://') or uri.startswith('https://'): 243 # make up a response 244 url = path_info + '?' + query_string 245 message = 'You are being redirected <a href="%s">here</a>' 246 return HTTP(303, message % url, Location=url), environ 247 elif not environ.get('__ROUTES_ONERROR__', False): 248 # wsgibase will be called recursively with 249 # the routes_onerror path. 250 environ['__ROUTES_ONERROR__'] = True # limit recursion 251 path_info = '/' + path_info.lstrip('/') 252 environ['PATH_INFO'] = path_info 253 environ['QUERY_STRING'] = query_string 254 environ['WEB2PY_STATUS_CODE'] = status 255 return None, environ 256 # do nothing! 257 return http_response, environ
258
259 260 -def try_redirect_on_error(http_object, request, ticket=None):
261 "called from main.wsgibase to rewrite the http response" 262 status = int(str(http_object.status).split()[0]) 263 if status > 399 and THREAD_LOCAL.routes.routes_onerror: 264 keys = set(('%s/%s' % (request.application, status), 265 '%s/*' % (request.application), 266 '*/%s' % (status), 267 '*/*')) 268 for (key, redir) in THREAD_LOCAL.routes.routes_onerror: 269 if key in keys: 270 if redir == '!': 271 break 272 elif '?' in redir: 273 url = '%s&code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 274 (redir, status, ticket, 275 urllib.quote_plus(request.env.request_uri), 276 request.url) 277 else: 278 url = '%s?code=%s&ticket=%s&requested_uri=%s&request_url=%s' % \ 279 (redir, status, ticket, 280 urllib.quote_plus(request.env.request_uri), 281 request.url) 282 return HTTP(303, 'You are being redirected <a href="%s">here</a>' % url, Location=url) 283 return http_object
284
285 286 -def load(routes='routes.py', app=None, data=None, rdict=None):
287 """ 288 load: read (if file) and parse routes 289 store results in params 290 (called from main.py at web2py initialization time) 291 If data is present, it's used instead of the routes.py contents. 292 If rdict is present, it must be a dict to be used for routers (unit test) 293 """ 294 global params 295 global routers 296 if app is None: 297 # reinitialize 298 global params_apps 299 params_apps = dict() 300 params = _params_default(app=None) # regex rewrite parameters 301 THREAD_LOCAL.routes = params # default to base regex rewrite parameters 302 routers = None 303 304 if isinstance(rdict, dict): 305 symbols = dict(routers=rdict) 306 path = 'rdict' 307 else: 308 if data is not None: 309 path = 'routes' 310 else: 311 if app is None: 312 path = abspath(routes) 313 else: 314 path = abspath('applications', app, routes) 315 if not exists(path): 316 return 317 data = read_file(path).replace('\r\n', '\n') 318 319 symbols = dict(app=app) 320 try: 321 exec (data + '\n') in symbols 322 except SyntaxError, e: 323 logger.error( 324 '%s has a syntax error and will not be loaded\n' % path 325 + traceback.format_exc()) 326 raise e 327 328 p = _params_default(app) 329 330 for sym in ('routes_app', 'routes_in', 'routes_out'): 331 if sym in symbols: 332 for items in symbols[sym]: 333 p[sym].append(compile_regex(*items)) 334 for sym in ('routes_onerror', 'routes_apps_raw', 335 'error_handler', 'error_message', 'error_message_ticket', 336 'default_application', 'default_controller', 'default_function', 337 'logging'): 338 if sym in symbols: 339 p[sym] = symbols[sym] 340 if 'routers' in symbols: 341 p.routers = Storage(symbols['routers']) 342 for key in p.routers: 343 if isinstance(p.routers[key], dict): 344 p.routers[key] = Storage(p.routers[key]) 345 346 if app is None: 347 params = p # install base rewrite parameters 348 THREAD_LOCAL.routes = params # install default as current routes 349 # 350 # create the BASE router if routers in use 351 # 352 routers = params.routers # establish routers if present 353 if isinstance(routers, dict): 354 routers = Storage(routers) 355 if routers is not None: 356 router = _router_default() 357 if routers.BASE: 358 router.update(routers.BASE) 359 routers.BASE = router 360 361 # scan each app in applications/ 362 # create a router, if routers are in use 363 # parse the app-specific routes.py if present 364 # 365 all_apps = [] 366 apppath = abspath('applications') 367 for appname in os.listdir(apppath): 368 if not appname.startswith('.') and \ 369 isdir(abspath(apppath, appname)) and \ 370 isdir(abspath(apppath, appname, 'controllers')): 371 all_apps.append(appname) 372 if routers: 373 router = Storage(routers.BASE) # new copy 374 if appname in routers: 375 for key in routers[appname].keys(): 376 if key in ROUTER_BASE_KEYS: 377 raise SyntaxError("BASE-only key '%s' in router '%s'" % (key, appname)) 378 router.update(routers[appname]) 379 routers[appname] = router 380 if exists(abspath('applications', appname, routes)): 381 load(routes, appname) 382 383 if routers: 384 load_routers(all_apps) 385 386 else: # app 387 params_apps[app] = p 388 if routers and p.routers: 389 if app in p.routers: 390 routers[app].update(p.routers[app]) 391 392 log_rewrite('URL rewrite is on. configuration in %s' % path)
393
394 395 -def compile_regex(k, v, env=None):
396 """ 397 Preprocess and compile the regular expressions in routes_app/in/out 398 The resulting regex will match a pattern of the form: 399 400 [remote address]:[protocol]://[host]:[method] [path] 401 402 We allow abbreviated regexes on input; here we try to complete them. 403 """ 404 k0 = k # original k for error reporting 405 # bracket regex in ^...$ if not already done 406 if not k[0] == '^': 407 k = '^%s' % k 408 if not k[-1] == '$': 409 k = '%s$' % k 410 # if there are no :-separated parts, prepend a catch-all for the IP address 411 if k.find(':') < 0: 412 # k = '^.*?:%s' % k[1:] 413 k = '^.*?:https?://[^:/]+:[a-z]+ %s' % k[1:] 414 # if there's no ://, provide a catch-all for the protocol, host & method 415 if k.find('://') < 0: 416 i = k.find(':/') 417 if i < 0: 418 raise SyntaxError("routes pattern syntax error: path needs leading '/' [%s]" % k0) 419 k = r'%s:https?://[^:/]+:[a-z]+ %s' % (k[:i], k[i + 1:]) 420 # $anything -> ?P<anything>.* 421 for item in regex_anything.findall(k): 422 k = k.replace(item, '(?P<anything>.*)') 423 # $a (etc) -> ?P<a>\w+ 424 for item in regex_at.findall(k): 425 k = k.replace(item, r'(?P<%s>\w+)' % item[1:]) 426 # same for replacement pattern, but with \g 427 for item in regex_at.findall(v): 428 v = v.replace(item, r'\g<%s>' % item[1:]) 429 return (re.compile(k, re.DOTALL), v, env or {})
430
431 432 -def load_routers(all_apps):
433 "load-time post-processing of routers" 434 435 for app in routers: 436 # initialize apps with routers that aren't present, 437 # on behalf of unit tests 438 if app not in all_apps: 439 all_apps.append(app) 440 router = Storage(routers.BASE) # new copy 441 if app != 'BASE': 442 keys = set(routers[app]).intersection(ROUTER_BASE_KEYS) 443 if keys: 444 raise SyntaxError("BASE-only key(s) %s in router '%s'" % ( 445 tuple(keys), app)) 446 router.update(routers[app]) 447 routers[app] = router 448 router = routers[app] 449 keys = set(router).difference(ROUTER_KEYS) 450 if keys: 451 raise SyntaxError("unknown key(s) %s in router '%s'" % ( 452 tuple(keys), app)) 453 if not router.controllers: 454 router.controllers = set() 455 elif not isinstance(router.controllers, str): 456 router.controllers = set(router.controllers) 457 if router.languages: 458 router.languages = set(router.languages) 459 else: 460 router.languages = set() 461 if router.functions: 462 if isinstance(router.functions, (set, tuple, list)): 463 functions = set(router.functions) 464 if isinstance(router.default_function, str): 465 functions.add( 466 router.default_function) # legacy compatibility 467 router.functions = {router.default_controller: functions} 468 for controller in router.functions: 469 router.functions[controller] = set( 470 router.functions[controller]) 471 else: 472 router.functions = dict() 473 if app != 'BASE': 474 for base_only in ROUTER_BASE_KEYS: 475 router.pop(base_only, None) 476 if 'domain' in router: 477 routers.BASE.domains[router.domain] = app 478 if isinstance(router.controllers, str) and router.controllers == 'DEFAULT': 479 router.controllers = set() 480 if isdir(abspath('applications', app)): 481 cpath = abspath('applications', app, 'controllers') 482 for cname in os.listdir(cpath): 483 if isfile(abspath(cpath, cname)) and cname.endswith('.py'): 484 router.controllers.add(cname[:-3]) 485 if router.controllers: 486 router.controllers.add('static') 487 router.controllers.add(router.default_controller) 488 489 if isinstance(routers.BASE.applications, str) and routers.BASE.applications == 'ALL': 490 routers.BASE.applications = list(all_apps) 491 if routers.BASE.applications: 492 routers.BASE.applications = set(routers.BASE.applications) 493 else: 494 routers.BASE.applications = set() 495 496 for app in routers.keys(): 497 # set router name 498 router = routers[app] 499 router.name = app 500 # compile URL validation patterns 501 router._acfe_match = re.compile(router.acfe_match) 502 router._file_match = re.compile(router.file_match) 503 if router.args_match: 504 router._args_match = re.compile(router.args_match) 505 # convert path_prefix to a list of path elements 506 if router.path_prefix: 507 if isinstance(router.path_prefix, str): 508 router.path_prefix = router.path_prefix.strip('/').split('/') 509 510 # rewrite BASE.domains as tuples 511 # 512 # key: 'domain[:port]' -> (domain, port) 513 # value: 'application[/controller] -> (application, controller) 514 # (port and controller may be None) 515 # 516 domains = dict() 517 if routers.BASE.domains: 518 for (d, a) in routers.BASE.domains.iteritems(): 519 (domain, app) = (d.strip(':'), a.strip('/')) 520 if ':' in domain: 521 (domain, port) = domain.split(':') 522 else: 523 port = None 524 if '/' in app: 525 (app, ctlr) = app.split('/', 1) 526 else: 527 ctlr = None 528 if ctlr and '/' in ctlr: 529 (ctlr, fcn) = ctlr.split('/') 530 else: 531 fcn = None 532 if app not in all_apps and app not in routers: 533 raise SyntaxError("unknown app '%s' in domains" % app) 534 domains[(domain, port)] = (app, ctlr, fcn) 535 routers.BASE.domains = domains
536
537 538 -def regex_uri(e, regexes, tag, default=None):
539 "filter incoming URI against a list of regexes" 540 path = e['PATH_INFO'] 541 host = e.get('HTTP_HOST', e.get('SERVER_NAME', 'localhost')).lower() 542 i = host.find(':') 543 if i > 0: 544 host = host[:i] 545 key = '%s:%s://%s:%s %s' % \ 546 (e.get('REMOTE_ADDR', 'localhost'), 547 e.get('wsgi.url_scheme', 'http').lower(), host, 548 e.get('REQUEST_METHOD', 'get').lower(), path) 549 for (regex, value, custom_env) in regexes: 550 if regex.match(key): 551 e.update(custom_env) 552 rewritten = regex.sub(value, key) 553 log_rewrite('%s: [%s] [%s] -> %s' % (tag, key, value, rewritten)) 554 return rewritten 555 log_rewrite('%s: [%s] -> %s (not rewritten)' % (tag, key, default)) 556 return default
557
558 559 -def regex_select(env=None, app=None, request=None):
560 """ 561 select a set of regex rewrite params for the current request 562 """ 563 if app: 564 THREAD_LOCAL.routes = params_apps.get(app, params) 565 elif env and params.routes_app: 566 if routers: 567 map_url_in(request, env, app=True) 568 else: 569 app = regex_uri(env, params.routes_app, "routes_app") 570 THREAD_LOCAL.routes = params_apps.get(app, params) 571 else: 572 THREAD_LOCAL.routes = params # default to base rewrite parameters 573 log_rewrite("select routing parameters: %s" % THREAD_LOCAL.routes.name) 574 return app # for doctest
575
576 577 -def regex_filter_in(e):
578 "regex rewrite incoming URL" 579 routes = THREAD_LOCAL.routes 580 query = e.get('QUERY_STRING', None) 581 e['WEB2PY_ORIGINAL_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '') 582 if routes.routes_in: 583 path = regex_uri(e, routes.routes_in, 584 "routes_in", e['PATH_INFO']) 585 rmatch = regex_redirect.match(path) 586 if rmatch: 587 raise HTTP(int(rmatch.group(1)), location=rmatch.group(2)) 588 items = path.split('?', 1) 589 e['PATH_INFO'] = items[0] 590 if len(items) > 1: 591 if query: 592 query = items[1] + '&' + query 593 else: 594 query = items[1] 595 e['QUERY_STRING'] = query 596 e['REQUEST_URI'] = e['PATH_INFO'] + (query and ('?' + query) or '') 597 return e
598
599 600 -def sluggify(key):
601 return key.lower().replace('.', '_')
602
603 -def invalid_url(routes):
604 raise HTTP(400, 605 routes.error_message % 'invalid request', 606 web2py_error='invalid path')
607
608 -def regex_url_in(request, environ):
609 "rewrite and parse incoming URL" 610 611 # ################################################## 612 # select application 613 # rewrite URL if routes_in is defined 614 # update request.env 615 # ################################################## 616 617 regex_select(env=environ, request=request) 618 routes = THREAD_LOCAL.routes 619 if routes.routes_in: 620 environ = regex_filter_in(environ) 621 request.env.update( 622 (k.lower().replace('.', '_'), v) for k, v in environ.iteritems()) 623 624 # ################################################## 625 # serve if a static file 626 # ################################################## 627 628 path = request.env.path_info.replace('\\', '/') or '/' 629 path = regex_space.sub('_', path) 630 if path.endswith('/') and len(path) > 1: 631 path = path[:-1] 632 match = regex_url.match(path) 633 if not match: 634 invalid_url(routes) 635 request.raw_args = (match.group('s') or '') 636 if request.raw_args.startswith('/'): 637 request.raw_args = request.raw_args[1:] 638 if match.group('c') == 'static': 639 application = match.group('a') 640 version, filename = None, match.group('z') 641 if not filename: 642 raise HTTP(404) 643 items = filename.split('/', 1) 644 if regex_version.match(items[0]): 645 version, filename = items 646 static_folder = pjoin(request.env.applications_parent, 647 'applications', application,'static') 648 static_file = os.path.abspath(pjoin(static_folder,filename)) 649 if not static_file.startswith(static_folder): 650 invalid_url(routes) 651 return (static_file, version, environ) 652 else: 653 # ################################################## 654 # parse application, controller and function 655 # ################################################## 656 request.application = match.group('a') or routes.default_application 657 request.controller = match.group('c') or routes.default_controller 658 request.function = match.group('f') or routes.default_function 659 request.raw_extension = match.group('e') 660 request.extension = request.raw_extension or 'html' 661 if request.application in routes.routes_apps_raw: 662 # application is responsible for parsing args 663 request.args = None 664 elif not regex_args.match(request.raw_args): 665 invalid_url(routes) 666 elif request.raw_args: 667 request.args = List(request.raw_args.split('/')) 668 else: 669 request.args = List([]) 670 return (None, None, environ)
671
672 673 -def regex_filter_out(url, e=None):
674 "regex rewrite outgoing URL" 675 if not hasattr(THREAD_LOCAL, 'routes'): 676 regex_select() # ensure routes is set (for application threads) 677 routes = THREAD_LOCAL.routes 678 if routers: 679 return url # already filtered 680 if routes.routes_out: 681 items = url.split('?', 1) 682 if e: 683 host = e.get('http_host', 'localhost').lower() 684 i = host.find(':') 685 if i > 0: 686 host = host[:i] 687 items[0] = '%s:%s://%s:%s %s' % \ 688 (e.get('remote_addr', ''), 689 e.get('wsgi_url_scheme', 'http').lower(), host, 690 e.get('request_method', 'get').lower(), items[0]) 691 else: 692 items[0] = ':http://localhost:get %s' % items[0] 693 for (regex, value, tmp) in routes.routes_out: 694 if regex.match(items[0]): 695 rewritten = '?'.join([regex.sub(value, items[0])] + items[1:]) 696 log_rewrite('routes_out: [%s] -> %s' % (url, rewritten)) 697 return rewritten 698 log_rewrite('routes_out: [%s] not rewritten' % url) 699 return url
700
701 702 -def filter_url(url, method='get', remote='0.0.0.0', 703 out=False, app=False, lang=None, 704 domain=(None, None), env=False, scheme=None, 705 host=None, port=None):
706 """ 707 doctest/unittest interface to regex_filter_in() and regex_filter_out() 708 """ 709 match = regex_full_url.match(url) 710 urlscheme = match.group('scheme').lower() 711 urlhost = match.group('host').lower() 712 uri = match.group('uri') 713 k = uri.find('?') 714 if k < 0: 715 k = len(uri) 716 if isinstance(domain, str): 717 domain = (domain, None) 718 (path_info, query_string) = (uri[:k], uri[k + 1:]) 719 path_info = urllib.unquote(path_info) # simulate server 720 e = { 721 'REMOTE_ADDR': remote, 722 'REQUEST_METHOD': method, 723 'wsgi.url_scheme': urlscheme, 724 'HTTP_HOST': urlhost, 725 'REQUEST_URI': uri, 726 'PATH_INFO': path_info, 727 'QUERY_STRING': query_string, 728 #for filter_out request.env use lowercase 729 'remote_addr': remote, 730 'request_method': method, 731 'wsgi_url_scheme': urlscheme, 732 'http_host': urlhost 733 } 734 735 request = Storage() 736 e["applications_parent"] = global_settings.applications_parent 737 request.env = Storage(e) 738 request.uri_language = lang 739 740 # determine application only 741 # 742 if app: 743 if routers: 744 return map_url_in(request, e, app=True) 745 return regex_select(e) 746 747 # rewrite outbound URL 748 # 749 if out: 750 (request.env.domain_application, 751 request.env.domain_controller) = domain 752 items = path_info.lstrip('/').split('/') 753 if items[-1] == '': 754 items.pop() # adjust trailing empty args 755 assert len(items) >= 3, "at least /a/c/f is required" 756 a = items.pop(0) 757 c = items.pop(0) 758 f = items.pop(0) 759 if not routers: 760 return regex_filter_out(uri, e) 761 acf = map_url_out( 762 request, None, a, c, f, items, None, scheme, host, port) 763 if items: 764 url = '%s/%s' % (acf, '/'.join(items)) 765 if items[-1] == '': 766 url += '/' 767 else: 768 url = acf 769 if query_string: 770 url += '?' + query_string 771 return url 772 773 # rewrite inbound URL 774 # 775 (static, version, e) = url_in(request, e) 776 if static: 777 return static 778 result = "/%s/%s/%s" % ( 779 request.application, request.controller, request.function) 780 if request.extension and request.extension != 'html': 781 result += ".%s" % request.extension 782 if request.args: 783 result += " %s" % request.args 784 if e['QUERY_STRING']: 785 result += " ?%s" % e['QUERY_STRING'] 786 if request.uri_language: 787 result += " (%s)" % request.uri_language 788 if env: 789 return request.env 790 return result
791
792 793 -def filter_err(status, application='app', ticket='tkt'):
794 "doctest/unittest interface to routes_onerror" 795 routes = THREAD_LOCAL.routes 796 if status > 399 and routes.routes_onerror: 797 keys = set(('%s/%s' % (application, status), 798 '%s/*' % (application), 799 '*/%s' % (status), 800 '*/*')) 801 for (key, redir) in routes.routes_onerror: 802 if key in keys: 803 if redir == '!': 804 break 805 elif '?' in redir: 806 url = redir + '&' + 'code=%s&ticket=%s' % (status, ticket) 807 else: 808 url = redir + '?' + 'code=%s&ticket=%s' % (status, ticket) 809 return url # redirection 810 return status # no action
811
812 # router support 813 # 814 815 816 -class MapUrlIn(object):
817 "logic for mapping incoming URLs" 818
819 - def __init__(self, request=None, env=None):
820 "initialize a map-in object" 821 self.request = request 822 self.env = env 823 824 self.router = None 825 self.application = None 826 self.language = None 827 self.controller = None 828 self.function = None 829 self.extension = 'html' 830 831 self.controllers = set() 832 self.functions = dict() 833 self.languages = set() 834 self.default_language = None 835 self.map_hyphen = False 836 self.exclusive_domain = False 837 838 path = self.env['PATH_INFO'] 839 self.query = self.env.get('QUERY_STRING', None) 840 path = path.lstrip('/') 841 self.env['PATH_INFO'] = '/' + path 842 self.env['WEB2PY_ORIGINAL_URI'] = self.env['PATH_INFO'] + ( 843 self.query and ('?' + self.query) or '') 844 845 # to handle empty args, strip exactly one trailing slash, if present 846 # .../arg1// represents one trailing empty arg 847 # 848 if path.endswith('/'): 849 path = path[:-1] 850 self.args = List(path and path.split('/') or []) 851 852 # see http://www.python.org/dev/peps/pep-3333/#url-reconstruction for URL composition 853 self.remote_addr = self.env.get('REMOTE_ADDR', 'localhost') 854 self.scheme = self.env.get('wsgi.url_scheme', 'http').lower() 855 self.method = self.env.get('REQUEST_METHOD', 'get').lower() 856 (self.host, self.port) = (self.env.get('HTTP_HOST'), None) 857 if not self.host: 858 (self.host, self.port) = ( 859 self.env.get('SERVER_NAME'), self.env.get('SERVER_PORT')) 860 if not self.host: 861 (self.host, self.port) = ('localhost', '80') 862 if ':' in self.host: 863 (self.host, self.port) = self.host.rsplit(':',1) # for ipv6 support 864 if not self.port: 865 self.port = '443' if self.scheme == 'https' else '80'
866
867 - def map_prefix(self):
868 "strip path prefix, if present in its entirety" 869 prefix = routers.BASE.path_prefix 870 if prefix: 871 prefixlen = len(prefix) 872 if prefixlen > len(self.args): 873 return 874 for i in xrange(prefixlen): 875 if prefix[i] != self.args[i]: 876 return # prefix didn't match 877 self.args = List(self.args[prefixlen:]) # strip the prefix
878
879 - def map_app(self):
880 "determine application name" 881 base = routers.BASE # base router 882 self.domain_application = None 883 self.domain_controller = None 884 self.domain_function = None 885 arg0 = self.harg0 886 if not base.exclusive_domain and base.applications and arg0 in base.applications: 887 self.application = arg0 888 elif not base.exclusive_domain and arg0 and not base.applications: 889 self.application = arg0 890 elif (self.host, self.port) in base.domains: 891 (self.application, self.domain_controller, 892 self.domain_function) = base.domains[(self.host, self.port)] 893 self.env['domain_application'] = self.application 894 self.env['domain_controller'] = self.domain_controller 895 self.env['domain_function'] = self.domain_function 896 elif (self.host, None) in base.domains: 897 (self.application, self.domain_controller, 898 self.domain_function) = base.domains[(self.host, None)] 899 self.env['domain_application'] = self.application 900 self.env['domain_controller'] = self.domain_controller 901 self.env['domain_function'] = self.domain_function 902 elif base.applications and arg0 in base.applications: 903 self.application = arg0 904 elif arg0 and not base.applications: 905 self.application = arg0 906 else: 907 self.application = base.default_application or '' 908 self.pop_arg_if(self.application == arg0) 909 910 if not base._acfe_match.match(self.application): 911 raise HTTP( 912 400, THREAD_LOCAL.routes.error_message % 'invalid request', 913 web2py_error="invalid application: '%s'" % self.application) 914 915 if self.application not in routers and \ 916 (self.application != THREAD_LOCAL.routes.default_application or self.application == 'welcome'): 917 raise HTTP( 918 400, THREAD_LOCAL.routes.error_message % 'invalid request', 919 web2py_error="unknown application: '%s'" % self.application) 920 921 # set the application router 922 # 923 log_rewrite("select application=%s" % self.application) 924 self.request.application = self.application 925 if self.application not in routers: 926 self.router = routers.BASE # support gluon.main.wsgibase init->welcome 927 else: 928 self.router = routers[self.application] # application router 929 self.controllers = self.router.controllers 930 self.default_controller = self.domain_controller or self.router.default_controller 931 self.functions = self.router.functions 932 self.languages = self.router.languages 933 self.default_language = self.router.default_language 934 self.map_hyphen = self.router.map_hyphen 935 self.exclusive_domain = self.router.exclusive_domain 936 self._acfe_match = self.router._acfe_match 937 self.file_match = self.router.file_match 938 self._file_match = self.router._file_match 939 self._args_match = self.router._args_match
940
941 - def map_root_static(self):
942 ''' 943 handle root-static files (no hyphen mapping) 944 945 a root-static file is one whose incoming URL expects it to be at the root, 946 typically robots.txt & favicon.ico 947 ''' 948 949 if len(self.args) == 1 and self.arg0 in self.router.root_static: 950 self.controller = self.request.controller = 'static' 951 root_static_file = pjoin(self.request.env.applications_parent, 952 'applications', self.application, 953 self.controller, self.arg0) 954 log_rewrite("route: root static=%s" % root_static_file) 955 return root_static_file, None 956 return None, None
957
958 - def map_language(self):
959 "handle language (no hyphen mapping)" 960 arg0 = self.arg0 # no hyphen mapping 961 if arg0 and self.languages and arg0 in self.languages: 962 self.language = arg0 963 else: 964 self.language = self.default_language 965 if self.language: 966 log_rewrite("route: language=%s" % self.language) 967 self.pop_arg_if(self.language == arg0) 968 arg0 = self.arg0
969
970 - def map_controller(self):
971 "identify controller" 972 # handle controller 973 # 974 arg0 = self.harg0 # map hyphens 975 if not arg0 or (self.controllers and arg0 not in self.controllers): 976 self.controller = self.default_controller or '' 977 else: 978 self.controller = arg0 979 self.pop_arg_if(arg0 == self.controller) 980 log_rewrite("route: controller=%s" % self.controller) 981 if not self.router._acfe_match.match(self.controller): 982 raise HTTP( 983 400, THREAD_LOCAL.routes.error_message % 'invalid request', 984 web2py_error='invalid controller')
985
986 - def map_static(self):
987 ''' 988 handle static files 989 file_match but no hyphen mapping 990 ''' 991 if self.controller != 'static': 992 return None, None 993 version = regex_version.match(self.args(0)) 994 if self.args and version: 995 file = '/'.join(self.args[1:]) 996 else: 997 file = '/'.join(self.args) 998 if len(self.args) == 0: 999 bad_static = True # require a file name 1000 elif '/' in self.file_match: 1001 # match the path 1002 bad_static = not self.router._file_match.match(file) 1003 else: 1004 # match path elements 1005 bad_static = False 1006 for name in self.args: 1007 bad_static = bad_static or name in ( 1008 '', '.', '..') or not self.router._file_match.match(name) 1009 if bad_static: 1010 log_rewrite('bad static path=%s' % file) 1011 raise HTTP(400, 1012 THREAD_LOCAL.routes.error_message % 'invalid request', 1013 web2py_error='invalid static file') 1014 # 1015 # support language-specific static subdirectories, 1016 # eg /appname/en/static/filename => applications/appname/static/en/filename 1017 # if language-specific file doesn't exist, try same file in static 1018 # 1019 if self.language: 1020 static_file = pjoin(self.request.env.applications_parent, 1021 'applications', self.application, 1022 'static', self.language, file) 1023 if not self.language or not isfile(static_file): 1024 static_file = pjoin(self.request.env.applications_parent, 1025 'applications', self.application, 1026 'static', file) 1027 self.extension = None 1028 log_rewrite("route: static=%s" % static_file) 1029 return static_file, version
1030
1031 - def map_function(self):
1032 "handle function.extension" 1033 arg0 = self.harg0 # map hyphens 1034 functions = self.functions.get(self.controller, set()) 1035 if isinstance(self.router.default_function, dict): 1036 default_function = self.router.default_function.get( 1037 self.controller, None) 1038 else: 1039 default_function = self.router.default_function # str or None 1040 default_function = self.domain_function or default_function 1041 if not arg0 or functions and arg0 not in functions: 1042 self.function = default_function or "" 1043 self.pop_arg_if(arg0 and self.function == arg0) 1044 else: 1045 func_ext = arg0.split('.') 1046 if len(func_ext) > 1: 1047 self.function = func_ext[0] 1048 self.extension = func_ext[-1] 1049 else: 1050 self.function = arg0 1051 self.pop_arg_if(True) 1052 log_rewrite( 1053 "route: function.ext=%s.%s" % (self.function, self.extension)) 1054 1055 if not self.router._acfe_match.match(self.function): 1056 raise HTTP( 1057 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1058 web2py_error='invalid function') 1059 if self.extension and not self.router._acfe_match.match(self.extension): 1060 raise HTTP( 1061 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1062 web2py_error='invalid extension')
1063
1064 - def validate_args(self):
1065 ''' 1066 check args against validation pattern 1067 ''' 1068 for arg in self.args: 1069 if not self.router._args_match.match(arg): 1070 raise HTTP( 1071 400, THREAD_LOCAL.routes.error_message % 'invalid request', 1072 web2py_error='invalid arg <%s>' % arg)
1073
1074 - def sluggify(self):
1075 "" 1076 self.request.env.update( 1077 (k.lower().replace('.', '_'), v) for k, v in self.env.iteritems())
1078
1079 - def update_request(self):
1080 ''' 1081 update request from self 1082 build env.request_uri 1083 make lower-case versions of http headers in env 1084 ''' 1085 self.request.application = self.application 1086 self.request.controller = self.controller 1087 self.request.function = self.function 1088 self.request.extension = self.extension 1089 self.request.args = self.args 1090 if self.language: 1091 self.request.uri_language = self.language 1092 uri = '/%s/%s' % (self.controller, self.function) 1093 app = self.application 1094 if self.map_hyphen: 1095 uri = uri.replace('_', '-') 1096 app = app.replace('_', '-') 1097 if self.extension and self.extension != 'html': 1098 uri += '.' + self.extension 1099 if self.language: 1100 uri = '/%s%s' % (self.language, uri) 1101 uri = '/%s%s%s%s' % ( 1102 app, 1103 uri, 1104 urllib.quote('/' + '/'.join( 1105 str(x) for x in self.args)) if self.args else '', 1106 ('?' + self.query) if self.query else '') 1107 self.env['REQUEST_URI'] = uri 1108 self.sluggify()
1109 1110 @property
1111 - def arg0(self):
1112 "return first arg" 1113 return self.args(0)
1114 1115 @property
1116 - def harg0(self):
1117 "return first arg with optional hyphen mapping" 1118 if self.map_hyphen and self.args(0): 1119 return self.args(0).replace('-', '_') 1120 return self.args(0)
1121
1122 - def pop_arg_if(self, dopop):
1123 "conditionally remove first arg and return new first arg" 1124 if dopop: 1125 self.args.pop(0)
1126
1127 1128 -class MapUrlOut(object):
1129 "logic for mapping outgoing URLs" 1130
1131 - def __init__(self, request, env, application, controller, 1132 function, args, other, scheme, host, port):
1133 "initialize a map-out object" 1134 self.default_application = routers.BASE.default_application 1135 if application in routers: 1136 self.router = routers[application] 1137 else: 1138 self.router = routers.BASE 1139 self.request = request 1140 self.env = env 1141 self.application = application 1142 self.controller = controller 1143 self.is_static = ( 1144 controller == 'static' or controller.startswith('static/')) 1145 self.function = function 1146 self.args = args 1147 self.other = other 1148 self.scheme = scheme 1149 self.host = host 1150 self.port = port 1151 1152 self.applications = routers.BASE.applications 1153 self.controllers = self.router.controllers 1154 self.functions = self.router.functions.get(self.controller, set()) 1155 self.languages = self.router.languages 1156 self.default_language = self.router.default_language 1157 self.exclusive_domain = self.router.exclusive_domain 1158 self.map_hyphen = self.router.map_hyphen 1159 self.map_static = self.router.map_static 1160 self.path_prefix = routers.BASE.path_prefix 1161 1162 self.domain_application = request and self.request.env.domain_application 1163 self.domain_controller = request and self.request.env.domain_controller 1164 if isinstance(self.router.default_function, dict): 1165 self.default_function = self.router.default_function.get( 1166 self.controller, None) 1167 else: 1168 self.default_function = self.router.default_function 1169 1170 if (self.router.exclusive_domain and self.domain_application and self.domain_application != self.application and not self.host): 1171 raise SyntaxError('cross-domain conflict: must specify host') 1172 1173 lang = request and request.uri_language 1174 if lang and self.languages and lang in self.languages: 1175 self.language = lang 1176 else: 1177 self.language = None 1178 1179 self.omit_application = False 1180 self.omit_language = False 1181 self.omit_controller = False 1182 self.omit_function = False
1183
1184 - def omit_lang(self):
1185 "omit language if possible" 1186 1187 if not self.language or self.language == self.default_language: 1188 self.omit_language = True
1189
1190 - def omit_acf(self):
1191 "omit what we can of a/c/f" 1192 1193 router = self.router 1194 1195 # Handle the easy no-args case of tail-defaults: /a/c /a / 1196 # 1197 if not self.args and self.function == self.default_function: 1198 self.omit_function = True 1199 if self.controller == router.default_controller: 1200 self.omit_controller = True 1201 if self.application == self.default_application: 1202 self.omit_application = True 1203 1204 # omit default application 1205 # (which might be the domain default application) 1206 # 1207 default_application = self.domain_application or self.default_application 1208 if self.application == default_application: 1209 self.omit_application = True 1210 1211 # omit controller if default controller 1212 # 1213 default_controller = ((self.application == self.domain_application) and self.domain_controller) or router.default_controller or '' 1214 if self.controller == default_controller: 1215 self.omit_controller = True 1216 1217 # omit function if possible 1218 # 1219 if self.functions and self.function in self.functions and self.function == self.default_function: 1220 self.omit_function = True 1221 1222 # prohibit ambiguous cases 1223 # 1224 # because we presume the lang string to be unambiguous, its presence protects application omission 1225 # 1226 if self.exclusive_domain: 1227 applications = [self.domain_application] 1228 else: 1229 applications = self.applications 1230 if self.omit_language: 1231 if not applications or self.controller in applications: 1232 self.omit_application = False 1233 if self.omit_application: 1234 if not applications or self.function in applications: 1235 self.omit_controller = False 1236 if not self.controllers or self.function in self.controllers: 1237 self.omit_controller = False 1238 if self.args: 1239 if self.args[0] in self.functions or self.args[0] in self.controllers or self.args[0] in applications: 1240 self.omit_function = False 1241 if self.omit_controller: 1242 if self.function in self.controllers or self.function in applications: 1243 self.omit_controller = False 1244 if self.omit_application: 1245 if self.controller in applications: 1246 self.omit_application = False 1247 1248 # handle static as a special case 1249 # (easier for external static handling) 1250 # 1251 if self.is_static: 1252 if not self.map_static: 1253 self.omit_application = False 1254 if self.language: 1255 self.omit_language = False 1256 self.omit_controller = False 1257 self.omit_function = False
1258
1259 - def build_acf(self):
1260 "build acf from components" 1261 acf = '' 1262 if self.map_hyphen: 1263 self.application = self.application.replace('_', '-') 1264 self.controller = self.controller.replace('_', '-') 1265 if self.controller != 'static' and not self.controller.startswith('static/'): 1266 self.function = self.function.replace('_', '-') 1267 if not self.omit_application: 1268 acf += '/' + self.application 1269 # handle case of flipping lang/static/file to static/lang/file for external rewrite 1270 if self.is_static and self.map_static is False and not self.omit_language: 1271 acf += '/' + self.controller + '/' + self.language 1272 else: 1273 if not self.omit_language: 1274 acf += '/' + self.language 1275 if not self.omit_controller: 1276 acf += '/' + self.controller 1277 if not self.omit_function: 1278 acf += '/' + self.function 1279 if self.path_prefix: 1280 acf = '/' + '/'.join(self.path_prefix) + acf 1281 if self.args: 1282 return acf 1283 return acf or '/'
1284
1285 - def acf(self):
1286 "convert components to /app/lang/controller/function" 1287 1288 if not routers: 1289 return None # use regex filter 1290 self.omit_lang() # try to omit language 1291 self.omit_acf() # try to omit a/c/f 1292 return self.build_acf() # build and return the /a/lang/c/f string
1293
1294 1295 -def map_url_in(request, env, app=False):
1296 "route incoming URL" 1297 1298 # initialize router-url object 1299 # 1300 THREAD_LOCAL.routes = params # default to base routes 1301 map = MapUrlIn(request=request, env=env) 1302 map.sluggify() 1303 map.map_prefix() # strip prefix if present 1304 map.map_app() # determine application 1305 1306 # configure THREAD_LOCAL.routes for error rewrite 1307 # 1308 if params.routes_app: 1309 THREAD_LOCAL.routes = params_apps.get(app, params) 1310 1311 if app: 1312 return map.application 1313 1314 root_static_file, version = map.map_root_static( 1315 ) # handle root-static files 1316 if root_static_file: 1317 map.update_request() 1318 return (root_static_file, version, map.env) 1319 # handle mapping of lang/static to static/lang in externally-rewritten URLs 1320 # in case we have to handle them ourselves 1321 if map.languages and map.map_static is False and map.arg0 == 'static' and map.args(1) in map.languages: 1322 map.map_controller() 1323 map.map_language() 1324 else: 1325 map.map_language() 1326 map.map_controller() 1327 static_file, version = map.map_static() 1328 if static_file: 1329 map.update_request() 1330 return (static_file, version, map.env) 1331 map.map_function() 1332 map.validate_args() 1333 map.update_request() 1334 return (None, None, map.env)
1335
1336 1337 -def map_url_out(request, env, application, controller, 1338 function, args, other, scheme, host, port):
1339 ''' 1340 supply /a/c/f (or /a/lang/c/f) portion of outgoing url 1341 1342 The basic rule is that we can only make transformations 1343 that map_url_in can reverse. 1344 1345 Suppose that the incoming arguments are a,c,f,args,lang 1346 and that the router defaults are da, dc, df, dl. 1347 1348 We can perform these transformations trivially if args=[] and lang=None or dl: 1349 1350 /da/dc/df => / 1351 /a/dc/df => /a 1352 /a/c/df => /a/c 1353 1354 We would also like to be able to strip the default application or application/controller 1355 from URLs with function/args present, thus: 1356 1357 /da/c/f/args => /c/f/args 1358 /da/dc/f/args => /f/args 1359 1360 We use [applications] and [controllers] and {functions} to suppress ambiguous omissions. 1361 1362 We assume that language names do not collide with a/c/f names. 1363 ''' 1364 map = MapUrlOut(request, env, application, controller, 1365 function, args, other, scheme, host, port) 1366 return map.acf()
1367
1368 1369 -def get_effective_router(appname):
1370 "return a private copy of the effective router for the specified application" 1371 if not routers or appname not in routers: 1372 return None 1373 return Storage(routers[appname]) # return a copy
1374