import bleach
import markdown
def html_clean(htmlstr):
'''
采用bleach来清除不必要的标签,并linkify text
'''
tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul']
tags.extend(['div','p','hr','br','pre','code','span','h1','h2','h3','h4','h5','del','dl','img','sub','sup','u'
'table','thead','tr','th','td','tbody','dd','caption','blockquote','section'])
attributes = {'*':['class','id'],'a': ['href', 'title','target'],'img':['src','style','width','height']}
return bleach.linkify(bleach.clean(htmlstr,tags=tags,attributes=attributes))
def get_post_content(abspath):
from .utilRedis import redis_client as redis
key='post_get:%s' % abspath
if redis.exists(key):
alldict=redis.hgetall(key)
html=alldict[b'html'].decode('utf-8')
toc=alldict[b'toc'].decode('utf-8')
meta=json.loads(alldict[b'meta'].decode('utf-8'),encoding='utf-8')
redis.expire(key,60*60*1)
return html,toc,meta
with open(abspath, encoding='UTF-8') as f:
content = f.read()
# title=content.split('\n\n',1)[0]
# content=content.split('\n\n',1)[1]
md_ext = Constant.md_ext
md = markdown.Markdown(output_format='html5', encoding='utf-8', extensions=md_ext)
html = html_clean(md.convert(content))
toc = md.toc or ''
meta = md.Meta or {}
with redis.pipeline() as pipe:
pipe.hmset(key,{'html':html,'toc':toc,'meta':json.dumps(meta)}).expire(key,60*60*1).execute()
return html,toc,meta