gluon.cache

1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 """ 5 This file is part of the web2py Web Framework 6 Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu> 7 License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html) 8 9 Basic caching classes and methods 10 ================================= 11 12 - Cache - The generic caching object interfacing with the others 13 - CacheInRam - providing caching in ram 14 - CacheOnDisk - provides caches on disk 15 16 Memcache is also available via a different module (see gluon.contrib.memcache) 17 18 When web2py is running on Google App Engine, 19 caching will be provided by the GAE memcache 20 (see gluon.contrib.gae_memcache) 21 """ 22 import time 23 import portalocker 24 import shelve 25 import thread 26 import os 27 import logging 28 import re 29 import hashlib 30 import datetime 31 try: 32 from gluon import settings 33 have_settings = True 34 except ImportError: 35 have_settings = False 36 37 logger = logging.getLogger("web2py.cache") 38 39 __all__ = ['Cache', 'lazy_cache'] 40 41 42 DEFAULT_TIME_EXPIRE = 300

43 44 45 -class CacheAbstract(object):

46 """ 47 Abstract class for cache implementations. 48 Main function is now to provide referenced api documentation. 49 50 Use CacheInRam or CacheOnDisk instead which are derived from this class. 51 52 Attentions, Michele says: 53 54 There are signatures inside gdbm files that are used directly 55 by the python gdbm adapter that often are lagging behind in the 56 detection code in python part. 57 On every occasion that a gdbm store is probed by the python adapter, 58 the probe fails, because gdbm file version is newer. 59 Using gdbm directly from C would work, because there is backward 60 compatibility, but not from python! 61 The .shelve file is discarded and a new one created (with new 62 signature) and it works until it is probed again... 63 The possible consequences are memory leaks and broken sessions. 64 """ 65 66 cache_stats_name = 'web2py_cache_statistics' 67

68 - def __init__(self, request=None):

69 """ 70 Parameters 71 ---------- 72 request: 73 the global request object 74 """ 75 raise NotImplementedError

76

77 - def __call__(self, key, f, 78 time_expire=DEFAULT_TIME_EXPIRE):

79 """ 80 Tries retrieve the value corresponding to `key` from the cache of the 81 object exists and if it did not expire, else it called the function `f` 82 and stores the output in the cache corresponding to `key`. In the case 83 the output of the function is returned. 84 85 :param key: the key of the object to be store or retrieved 86 :param f: the function, whose output is to be cached 87 :param time_expire: expiration of the cache in microseconds 88 89 - `time_expire` is used to compare the current time with the time when 90 the requested object was last saved in cache. It does not affect 91 future requests. 92 - Setting `time_expire` to 0 or negative value forces the cache to 93 refresh. 94 95 If the function `f` is `None` the cache is cleared. 96 """ 97 raise NotImplementedError

98

99 - def clear(self, regex=None):

100 """ 101 Clears the cache of all keys that match the provided regular expression. 102 If no regular expression is provided, it clears all entries in cache. 103 104 Parameters 105 ---------- 106 regex: 107 if provided, only keys matching the regex will be cleared. 108 Otherwise all keys are cleared. 109 """ 110 111 raise NotImplementedError

112

113 - def increment(self, key, value=1):

114 """ 115 Increments the cached value for the given key by the amount in value 116 117 Parameters 118 ---------- 119 key: 120 key for the cached object to be incremeneted 121 value: 122 amount of the increment (defaults to 1, can be negative) 123 """ 124 raise NotImplementedError

125

126 - def _clear(self, storage, regex):

127 """ 128 Auxiliary function called by `clear` to search and clear cache entries 129 """ 130 r = re.compile(regex) 131 for (key, value) in storage.items(): 132 if r.match(str(key)): 133 del storage[key]

134

135 136 -class CacheInRam(CacheAbstract):

137 """ 138 Ram based caching 139 140 This is implemented as global (per process, shared by all threads) 141 dictionary. 142 A mutex-lock mechanism avoid conflicts. 143 """ 144 145 locker = thread.allocate_lock() 146 meta_storage = {} 147

148 - def __init__(self, request=None):

149 self.initialized = False 150 self.request = request 151 self.storage = {}

152

153 - def initialize(self):

154 if self.initialized: 155 return 156 else: 157 self.initialized = True 158 self.locker.acquire() 159 request = self.request 160 if request: 161 app = request.application 162 else: 163 app = '' 164 if not app in self.meta_storage: 165 self.storage = self.meta_storage[app] = { 166 CacheAbstract.cache_stats_name: {'hit_total': 0, 'misses': 0}} 167 else: 168 self.storage = self.meta_storage[app] 169 self.locker.release()

170

171 - def clear(self, regex=None):

172 self.initialize() 173 self.locker.acquire() 174 storage = self.storage 175 if regex is None: 176 storage.clear() 177 else: 178 self._clear(storage, regex) 179 180 if not CacheAbstract.cache_stats_name in storage.keys(): 181 storage[CacheAbstract.cache_stats_name] = { 182 'hit_total': 0, 'misses': 0} 183 184 self.locker.release()

185

186 - def __call__(self, key, f, 187 time_expire=DEFAULT_TIME_EXPIRE, 188 destroyer=None):

189 """ 190 Attention! cache.ram does not copy the cached object. It just stores a reference to it. 191 Turns out the deepcopying the object has some problems: 192 1) would break backward compatibility 193 2) would be limiting because people may want to cache live objects 194 3) would work unless we deepcopy no storage and retrival which would make things slow. 195 Anyway. You can deepcopy explicitly in the function generating the value to be cached. 196 """ 197 self.initialize() 198 199 dt = time_expire 200 now = time.time() 201 202 self.locker.acquire() 203 item = self.storage.get(key, None) 204 if item and f is None: 205 del self.storage[key] 206 if destroyer: 207 destroyer(item[1]) 208 self.storage[CacheAbstract.cache_stats_name]['hit_total'] += 1 209 self.locker.release() 210 211 if f is None: 212 return None 213 if item and (dt is None or item[0] > now - dt): 214 return item[1] 215 elif item and (item[0] < now - dt) and destroyer: 216 destroyer(item[1]) 217 value = f() 218 219 self.locker.acquire() 220 self.storage[key] = (now, value) 221 self.storage[CacheAbstract.cache_stats_name]['misses'] += 1 222 self.locker.release() 223 return value

224

225 - def increment(self, key, value=1):

226 self.initialize() 227 self.locker.acquire() 228 try: 229 if key in self.storage: 230 value = self.storage[key][1] + value 231 self.storage[key] = (time.time(), value) 232 except BaseException, e: 233 self.locker.release() 234 raise e 235 self.locker.release() 236 return value

237

238 239 -class CacheOnDisk(CacheAbstract):

240 """ 241 Disk based cache 242 243 This is implemented as a shelve object and it is shared by multiple web2py 244 processes (and threads) as long as they share the same filesystem. 245 The file is locked when accessed. 246 247 Disk cache provides persistance when web2py is started/stopped but it slower 248 than `CacheInRam` 249 250 Values stored in disk cache must be pickable. 251 """ 252

253 - def _close_shelve_and_unlock(self):

254 try: 255 if self.storage: 256 self.storage.close() 257 finally: 258 if self.locker and self.locked: 259 portalocker.unlock(self.locker) 260 self.locker.close() 261 self.locked = False

262

263 - def _open_shelve_and_lock(self):

264 """Open and return a shelf object, obtaining an exclusive lock 265 on self.locker first. Replaces the close method of the 266 returned shelf instance with one that releases the lock upon 267 closing.""" 268 269 storage = None 270 locker = None 271 locked = False 272 try: 273 locker = locker = open(self.locker_name, 'a') 274 portalocker.lock(locker, portalocker.LOCK_EX) 275 locked = True 276 try: 277 storage = shelve.open(self.shelve_name) 278 except: 279 logger.error('corrupted cache file %s, will try rebuild it' 280 % self.shelve_name) 281 storage = None 282 if not storage and os.path.exists(self.shelve_name): 283 os.unlink(self.shelve_name) 284 storage = shelve.open(self.shelve_name) 285 if not CacheAbstract.cache_stats_name in storage.keys(): 286 storage[CacheAbstract.cache_stats_name] = { 287 'hit_total': 0, 'misses': 0} 288 storage.sync() 289 except Exception, e: 290 if storage: 291 storage.close() 292 storage = None 293 if locked: 294 portalocker.unlock(locker) 295 locker.close() 296 locked = False 297 raise RuntimeError( 298 'unable to create/re-create cache file %s' % self.shelve_name) 299 self.locker = locker 300 self.locked = locked 301 self.storage = storage 302 return storage

303

304 - def __init__(self, request=None, folder=None):

305 self.initialized = False 306 self.request = request 307 self.folder = folder 308 self.storage = {}

309

310 - def initialize(self):

311 if self.initialized: 312 return 313 else: 314 self.initialized = True 315 folder = self.folder 316 request = self.request 317 318 # Lets test if the cache folder exists, if not 319 # we are going to create it 320 folder = folder or os.path.join(request.folder, 'cache') 321 322 if not os.path.exists(folder): 323 os.mkdir(folder) 324 325 ### we need this because of a possible bug in shelve that may 326 ### or may not lock 327 self.locker_name = os.path.join(folder, 'cache.lock') 328 self.shelve_name = os.path.join(folder, 'cache.shelve')

329

330 - def clear(self, regex=None):

331 self.initialize() 332 storage = self._open_shelve_and_lock() 333 try: 334 if regex is None: 335 storage.clear() 336 else: 337 self._clear(storage, regex) 338 storage.sync() 339 finally: 340 self._close_shelve_and_unlock()

341

342 - def __call__(self, key, f, 343 time_expire=DEFAULT_TIME_EXPIRE):

344 self.initialize() 345 dt = time_expire 346 storage = self._open_shelve_and_lock() 347 try: 348 item = storage.get(key, None) 349 storage[CacheAbstract.cache_stats_name]['hit_total'] += 1 350 if item and f is None: 351 del storage[key] 352 storage.sync() 353 now = time.time() 354 if f is None: 355 value = None 356 elif item and (dt is None or item[0] > now - dt): 357 value = item[1] 358 else: 359 value = f() 360 storage[key] = (now, value) 361 storage[CacheAbstract.cache_stats_name]['misses'] += 1 362 storage.sync() 363 finally: 364 self._close_shelve_and_unlock() 365 366 return value

367

368 - def increment(self, key, value=1):

369 self.initialize() 370 storage = self._open_shelve_and_lock() 371 try: 372 if key in storage: 373 value = storage[key][1] + value 374 storage[key] = (time.time(), value) 375 storage.sync() 376 finally: 377 self._close_shelve_and_unlock() 378 return value

379

380 -class CacheAction(object):

381 - def __init__(self, func, key, time_expire, cache, cache_model):

382 self.__name__ = func.__name__ 383 self.__doc__ = func.__doc__ 384 self.func = func 385 self.key = key 386 self.time_expire = time_expire 387 self.cache = cache 388 self.cache_model = cache_model

389

390 - def __call__(self, *a, **b):

391 if not self.key: 392 key2 = self.__name__ + ':' + repr(a) + ':' + repr(b) 393 else: 394 key2 = self.key.replace('%(name)s', self.__name__)\ 395 .replace('%(args)s', str(a)).replace('%(vars)s', str(b)) 396 cache_model = self.cache_model 397 if not cache_model or isinstance(cache_model, str): 398 cache_model = getattr(self.cache, cache_model or 'ram') 399 return cache_model(key2, 400 lambda a=a, b=b: self.func(*a, **b), 401 self.time_expire)

402

403 404 -class Cache(object):

405 """ 406 Sets up generic caching, creating an instance of both CacheInRam and 407 CacheOnDisk. 408 In case of GAE will make use of gluon.contrib.gae_memcache. 409 410 - self.ram is an instance of CacheInRam 411 - self.disk is an instance of CacheOnDisk 412 """ 413 414 autokey = ':%(name)s:%(args)s:%(vars)s' 415

416 - def __init__(self, request):

417 """ 418 Parameters 419 ---------- 420 request: 421 the global request object 422 """ 423 # GAE will have a special caching 424 if have_settings and settings.global_settings.web2py_runtime_gae: 425 from gluon.contrib.gae_memcache import MemcacheClient 426 self.ram = self.disk = MemcacheClient(request) 427 else: 428 # Otherwise use ram (and try also disk) 429 self.ram = CacheInRam(request) 430 try: 431 self.disk = CacheOnDisk(request) 432 except IOError: 433 logger.warning('no cache.disk (IOError)') 434 except AttributeError: 435 # normally not expected anymore, as GAE has already 436 # been accounted for 437 logger.warning('no cache.disk (AttributeError)')

438

439 - def action(self, time_expire=DEFAULT_TIME_EXPIRE, cache_model=None, 440 prefix=None, session=False, vars=True, lang=True, 441 user_agent=False, public=True, valid_statuses=None, 442 quick=None):

443 """ 444 Experimental! 445 Currently only HTTP 1.1 compliant 446 reference : http://code.google.com/p/doctype-mirror/wiki/ArticleHttpCaching 447 time_expire: same as @cache 448 cache_model: same as @cache 449 prefix: add a prefix to the calculated key 450 session: adds response.session_id to the key 451 vars: adds request.env.query_string 452 lang: adds T.accepted_language 453 user_agent: if True, adds is_mobile and is_tablet to the key. 454 Pass a dict to use all the needed values (uses str(.items())) (e.g. user_agent=request.user_agent()) 455 used only if session is not True 456 public: if False forces the Cache-Control to be 'private' 457 valid_statuses: by default only status codes starting with 1,2,3 will be cached. 458 pass an explicit list of statuses on which turn the cache on 459 quick: Session,Vars,Lang,User-agent,Public: 460 fast overrides with initial strings, e.g. 'SVLP' or 'VLP', or 'VLP' 461 """ 462 from gluon import current 463 from gluon.http import HTTP 464 def wrap(func): 465 def wrapped_f(): 466 if current.request.env.request_method != 'GET': 467 return func() 468 if time_expire: 469 cache_control = 'max-age=%(time_expire)s, s-maxage=%(time_expire)s' % dict(time_expire=time_expire) 470 if quick: 471 session_ = True if 'S' in quick else False 472 vars_ = True if 'V' in quick else False 473 lang_ = True if 'L' in quick else False 474 user_agent_ = True if 'U' in quick else False 475 public_ = True if 'P' in quick else False 476 else: 477 session_, vars_, lang_, user_agent_, public_ = session, vars, lang, user_agent, public 478 if not session_ and public_: 479 cache_control += ', public' 480 expires = (current.request.utcnow + datetime.timedelta(seconds=time_expire)).strftime('%a, %d %b %Y %H:%M:%S GMT') 481 else: 482 cache_control += ', private' 483 expires = 'Fri, 01 Jan 1990 00:00:00 GMT' 484 if cache_model: 485 #figure out the correct cache key 486 cache_key = [current.request.env.path_info, current.response.view] 487 if session_: 488 cache_key.append(current.response.session_id) 489 elif user_agent_: 490 if user_agent_ is True: 491 cache_key.append("%(is_mobile)s_%(is_tablet)s" % current.request.user_agent()) 492 else: 493 cache_key.append(str(user_agent_.items())) 494 if vars_: 495 cache_key.append(current.request.env.query_string) 496 if lang_: 497 cache_key.append(current.T.accepted_language) 498 cache_key = hashlib.md5('__'.join(cache_key)).hexdigest() 499 if prefix: 500 cache_key = prefix + cache_key 501 try: 502 #action returns something 503 rtn = cache_model(cache_key, lambda : func(), time_expire=time_expire) 504 http, status = None, current.response.status 505 except HTTP, e: 506 #action raises HTTP (can still be valid) 507 rtn = cache_model(cache_key, lambda : e.body, time_expire=time_expire) 508 http, status = HTTP(e.status, rtn, **e.headers), e.status 509 else: 510 #action raised a generic exception 511 http = None 512 else: 513 #no server-cache side involved 514 try: 515 #action returns something 516 rtn = func() 517 http, status = None, current.response.status 518 except HTTP, e: 519 #action raises HTTP (can still be valid) 520 status = e.status 521 http = HTTP(e.status, e.body, **e.headers) 522 else: 523 #action raised a generic exception 524 http = None 525 send_headers = False 526 if http and isinstance(valid_statuses, list): 527 if status in valid_statuses: 528 send_headers = True 529 elif valid_statuses is None: 530 if str(status)[0] in '123': 531 send_headers = True 532 if send_headers: 533 headers = { 534 'Pragma' : None, 535 'Expires' : expires, 536 'Cache-Control' : cache_control 537 } 538 current.response.headers.update(headers) 539 if cache_model and not send_headers: 540 #we cached already the value, but the status is not valid 541 #so we need to delete the cached value 542 cache_model(cache_key, None) 543 if http: 544 if send_headers: 545 http.headers.update(current.response.headers) 546 raise http 547 return rtn

548 wrapped_f.__name__ = func.__name__ 549 wrapped_f.__doc__ = func.__doc__ 550 return wrapped_f

551 return wrap 552

553 - def __call__(self, 554 key=None, 555 time_expire=DEFAULT_TIME_EXPIRE, 556 cache_model=None):

557 """ 558 Decorator function that can be used to cache any function/method. 559 560 Example:: 561 562 @cache('key', 5000, cache.ram) 563 def f(): 564 return time.ctime() 565 566 When the function f is called, web2py tries to retrieve 567 the value corresponding to `key` from the cache of the 568 object exists and if it did not expire, else it calles the function `f` 569 and stores the output in the cache corresponding to `key`. In the case 570 the output of the function is returned. 571 572 :param key: the key of the object to be store or retrieved 573 :param time_expire: expiration of the cache in microseconds 574 :param cache_model: "ram", "disk", or other 575 (like "memcache" if defined). It defaults to "ram". 576 577 Notes 578 ----- 579 `time_expire` is used to compare the curret time with the time when the 580 requested object was last saved in cache. It does not affect future 581 requests. 582 Setting `time_expire` to 0 or negative value forces the cache to 583 refresh. 584 585 If the function `f` is an action, we suggest using 586 @cache.client instead 587 """ 588 589 def tmp(func, cache=self, cache_model=cache_model): 590 return CacheAction(func, key, time_expire, self, cache_model)

591 return tmp 592 593 @staticmethod

594 - def with_prefix(cache_model, prefix):

595 """ 596 allow replacing cache.ram with cache.with_prefix(cache.ram,'prefix') 597 it will add prefix to all the cache keys used. 598 """ 599 return lambda key, f, time_expire=DEFAULT_TIME_EXPIRE, prefix=prefix:\ 600 cache_model(prefix + key, f, time_expire)

601

602 603 -def lazy_cache(key=None, time_expire=None, cache_model='ram'):

604 """ 605 can be used to cache any function including in modules, 606 as long as the cached function is only called within a web2py request 607 if a key is not provided, one is generated from the function name 608 the time_expire defaults to None (no cache expiration) 609 if cache_model is "ram" then the model is current.cache.ram, etc. 610 """ 611 def decorator(f, key=key, time_expire=time_expire, cache_model=cache_model): 612 key = key or repr(f) 613 614 def g(*c, **d): 615 from gluon import current 616 return current.cache(key, time_expire, cache_model)(f)(*c, **d)

617 g.__name__ = f.__name__ 618 return g 619 return decorator 620

Source Code for Module gluon.cache