php
$s = '<dd>其实应该魂
归月球上更好
一些。。。</dd>'
$s = mb_convert_encoding($s, 'UTF-8', 'HTML-ENTITIES');
python
import re
def convert_callback(matches):
id = matches.group(1)
try:
return unichr(int(id))
except:
return id
data = """<dd>其实应该魂
归月球上更好
一些。。。</dd>"""
print re.sub("&#(\d+)(;|(?=\s))", convert_callback, data)