[SOLVED] What is use of sanitize attribute in html type field in odoo?

What is use of sanitize attribute in html type field in odoo?

In html type of field one attribute is available, In which we can pass True/False.

body_html = fields.Html('Body', translate=True, sanitize=False, help="Rich-text/HTML version of the message (placeholders may be used here)")

body_html = fields.Html('Body', translate=True, sanitize=True, help="Rich-text/HTML version of the message (placeholders may be used here)")

If we set True/False then we are getting same result.

What is difference if we set True/False in this field ?

Solution

It's just telling Odoo if to clean html code, like deleting scripts, tags/nodes, etc. For more information look into the code.

def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=False, sanitize_style=False, strip_style=False, strip_classes=False):
    if not src:
        return src
    src = ustr(src, errors='replace')
    # html: remove encoding attribute inside tags
    doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
    src = doctype.sub(r"", src)

    logger = logging.getLogger(__name__ + '.html_sanitize')

    # html encode email tags
    part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
    # remove results containing cite="mid:email_like@address" (ex: blockquote cite)
    # cite_except = re.compile(r"^((?!cite[\s]*=['\"]).)*$", re.IGNORECASE)
    src = part.sub(lambda m: ('cite=' not in m.group(1) and 'alt=' not in m.group(1)) and cgi.escape(m.group(1)) or m.group(1), src)
    # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner
    src = src.replace('<%', cgi.escape('<%'))
    src = src.replace('%>', cgi.escape('%>'))

    kwargs = {
        'page_structure': True,
        'style': strip_style,              # True = remove style tags/attrs
        'sanitize_style': sanitize_style,  # True = sanitize styling
        'forms': True,                     # True = remove form tags
        'remove_unknown_tags': False,
        'comments': False,
        'processing_instructions': False
    }
    if sanitize_tags:
        kwargs['allow_tags'] = allowed_tags
        if etree.LXML_VERSION >= (2, 3, 1):
            # kill_tags attribute has been added in version 2.3.1
            kwargs.update({
                'kill_tags': tags_to_kill,
                'remove_tags': tags_to_remove,
            })
        else:
            kwargs['remove_tags'] = tags_to_kill + tags_to_remove

    if sanitize_attributes and etree.LXML_VERSION >= (3, 1, 0):  # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
        if strip_classes:
            current_safe_attrs = safe_attrs - frozenset(['class'])
        else:
            current_safe_attrs = safe_attrs
        kwargs.update({
            'safe_attrs_only': True,
            'safe_attrs': current_safe_attrs,
        })
    else:
        kwargs.update({
            'safe_attrs_only': False,  # keep oe-data attributes + style
            'strip_classes': strip_classes,  # remove classes, even when keeping other attributes
        })

    try:
        # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
        cleaner = _Cleaner(**kwargs)
        cleaned = cleaner.clean_html(src)
        # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution
        cleaned = cleaned.replace('%24', '$')
        cleaned = cleaned.replace('%7B', '{')
        cleaned = cleaned.replace('%7D', '}')
        cleaned = cleaned.replace('%20', ' ')
        cleaned = cleaned.replace('%5B', '[')
        cleaned = cleaned.replace('%5D', ']')
        cleaned = cleaned.replace('%7C', '|')
        cleaned = cleaned.replace('&lt;%', '<%')
        cleaned = cleaned.replace('%&gt;', '%>')
        # html considerations so real html content match database value
        cleaned.replace(u'\xa0', '&nbsp;')
    except etree.ParserError, e:
        if 'empty' in str(e):
            return ""
        if not silent:
            raise
        logger.warning('ParserError obtained when sanitizing %r', src, exc_info=True)
        cleaned = '<p>ParserError when sanitizing</p>'
    except Exception:
        if not silent:
            raise
        logger.warning('unknown error obtained when sanitizing %r', src, exc_info=True)
        cleaned = '<p>Unknown error when sanitizing</p>'

    # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that
    if cleaned.startswith('<div>') and cleaned.endswith('</div>'):
        cleaned = cleaned[5:-6]

    return cleaned