"""XML manipulation."""
from __future__ import annotations
from logging import getLogger
from os.path import join, dirname, basename
from io import BytesIO
from textwrap import wrap
from filecmp import cmpfiles
from shutil import copy2
from json import loads
from lxml import etree
from pyramid.asset import abspath_from_asset_spec
from .i18n import _, translate
from .utils import normalize_spaces
LOG = getLogger(__name__)
XML_NS = '{http://www.w3.org/XML/1998/namespace}'
# =============================================================================
[docs]
def load_xml2(
filename: str,
relaxngs: dict | None = None,
data: str | bytes | etree.Element | None = None,
noline: bool = False,
parser: etree.XMLParser | None = None,
xinclude: bool = False) -> tuple[etree.ElementTree | None, str | None]:
"""Load an XML document and validate it against a Relax NG file.
This function is similar to load_xml() but returns its result in a Rust
way.
:param str filename:
Path to XML file.
:param dict relaxngs: (optional)
Relax NG dictionary such as ``{<pattern>: <relax_ng_file>,...}``. If it
is ``None``, no validation is performed.
:type data: str, bytes or :class:`lxml.etree.ElementTree`
:param data: (optional)
Content of the XML document. If it is not ``None``, it is used in place
of the content of the file ``filename``.
:param bool noline: (default=False)
If ``True``, the error message does not contain line numbers.
:type parser: :class:`etree.XMLParser`
:param parser: (optional)
Specific parser for ``etree.parse`` function.
:param bool xinclude: (default=False)
If ``True``, activate XInclude.
:rtype: tuple
:return:
A tuple such as ``(element, error)``.
"""
# Read file
# pylint: disable = protected-access
if data is None or not isinstance(data, etree._ElementTree):
if data is not None and not isinstance(data, bytes):
data = bytes(data.encode())
try:
tree = etree.parse(
filename if data is None else BytesIO(data), parser=parser)
except IOError:
return None, _('Unknown file "${n}"', {'n': basename(filename)})
except etree.XMLSyntaxError as error:
return None, str(error)
else:
tree = data
# pylint: enable = protected-access
# XInclude
if xinclude:
try:
tree.xinclude()
except etree.XIncludeError as error:
return None, str(error)
# Validate
if relaxngs is None:
return tree, None
err = validate_xml(tree, relaxngs, noline)
if err is not None:
return None, err
return tree, None
# =============================================================================
[docs]
def load_xml(filename, relaxngs=None, data=None, noline=False, parser=None,
xinclude=False):
"""Load an XML document and validate it against a Relax NG file.
:param str filename:
Path to XML file.
:param dict relaxngs: (optional)
Relax NG dictionary such as ``{<pattern>: <relax_ng_file>,...}``. If it
is ``None``, no validation is performed.
:type data: str, bytes or :class:`lxml.etree.ElementTree`
:param data: (optional)
Content of the XML document. If it is not ``None``, it is used in place
of the content of the file ``filename``.
:param bool noline: (default=False)
If ``True``, the error message does not contain line numbers.
:type parser: :class:`etree.XMLParser`
:param parser: (optional)
Specific parser for ``etree.parse`` function.
:param bool xinclude: (default=False)
If ``True``, activate XInclude.
:rtype: str, :class:`TranslationString` or :class:`ElementTree`
:return:
An error message or an instance of :class:`lxml.etree.ElementTree`
class.
"""
# Read file
# pylint: disable = protected-access
if data is None or not isinstance(data, etree._ElementTree):
if data is not None and not isinstance(data, bytes):
data = bytes(data.encode())
try:
tree = etree.parse(
filename if data is None else BytesIO(data), parser=parser)
except IOError:
return _('Unknown file "${n}"', {'n': basename(filename)})
except etree.XMLSyntaxError as error:
return str(error)
else:
tree = data
# pylint: enable = protected-access
# XInclude
if xinclude:
try:
tree.xinclude()
except etree.XIncludeError as error:
return str(error)
# Validate
if relaxngs is None:
return tree
err = validate_xml(tree, relaxngs, noline)
if err is not None:
return err
return tree
# =============================================================================
[docs]
def validate_xml(tree, relaxngs, noline=False):
"""Load an XML document and validate it against a Relax NG file.
:type tree: lxml.etree.ElementTree
:param tree:
XML document.
:param dict relaxngs:
Relax NG dictionary such as ``{<pattern>: <relax_ng_file>,...}``.
:param bool noline: (default=False)
If ``True``, the error message does not contain line numbers.
:rtype: str, :class:`TranslationString` or ``None``
:return:
An error message or ``None``.
"""
# Find the right RelaxNG
relaxng = None
pattern = None
root = tree.getroot()
for pattern in relaxngs:
chunks = pattern.split(',')
chunk = chunks[0].strip()
if root.tag != chunk:
continue
for chunk in chunks[1:]:
chunk = chunk.strip().split('=')
if root.get(chunk[0]) != chunk[1]:
chunk = None
break
if chunk is not None:
relaxng = relaxngs[pattern]
break
if relaxng is None:
return _('${tag}: Relax NG not found', {'tag': tree.getroot().tag})
# Load Relax NG
if isinstance(relaxng, str):
try:
relaxng = etree.RelaxNG(etree.parse(relaxng))
except IOError as error:
return str(error)
except (etree.XMLSyntaxError, etree.RelaxNGParseError) as error:
return '"{0}": {1}'.format(relaxng, error)
relaxngs[pattern] = relaxng
# Validate
if not relaxng.validate(tree):
err = relaxng.error_log.last_error
return err.message if noline else \
_('Line ${l}: ${m}', {'l': err.line, 'm': err.message})
return None
# =============================================================================
[docs]
def relaxng4validation(relaxng, attributes=('version',)):
"""Transform a Relax NG dictionary with keys ``'root'``, ``'file'`` and
possibly ``'namespace'`` and ``'version'`` into a dictionary compatible
with :func:`validate_xml`.
:param dict relaxng:
A Chrysalio Relax NG dictionary.
:param tuple attributes: (default=('version',))
Attributes to take into account in the pattern.
:rtype: :class:`dict` or ``None``
"""
if not relaxng:
return None
if 'namespace' in relaxng:
pattern = '{{{0}}}{1}'.format(
relaxng['namespace'], relaxng['root'])
else:
pattern = relaxng['root']
for attribute in attributes:
if attribute in relaxng:
pattern += ', {0}={1}'.format(attribute, relaxng[attribute])
return {pattern: relaxng['file']}
# =============================================================================
[docs]
def load_xslt2(filename: str) -> tuple[etree.ElementTree | None, str | None]:
"""Load a XSL file and create a etree.XSLT object.
This function is similar to load_xslt() but returns its result in a Rust
way.
:param str
:rtype: :class:`lxml.etree.XSLT` or :class:`str`
"""
try:
xslt = etree.XSLT(etree.parse(filename))
except (IOError, etree.XSLTParseError, etree.XMLSyntaxError) as error:
return None, str(error)
return xslt, None
# =============================================================================
[docs]
def load_xslt(filename: str):
"""Load a XSL file and create a etree.XSLT object.
:rtype: :class:`lxml.etree.XSLT` or :class:`str`
"""
try:
xslt = etree.XSLT(etree.parse(filename))
except (IOError, etree.XSLTParseError, etree.XMLSyntaxError) as error:
return str(error)
return xslt
# =============================================================================
[docs]
def create_entire_xml(relaxng, elements, validation=True):
"""Create an entire XML document composed of all ``elements``.
:param dict relaxng:
A dictionary with the name of the root element, the value of attribute
version and path to the Relax NG file to validate the result.
:param list elements:
A list of :class:`lxml.etree._Element` objects.
:param bool validation: (default=True)
Validate the result.
:rtype: :class:`~pyramid.i18n.TranslationString` or
:class:`lxml.etree._Element`
:return:
An error message or an object :class:`lxml.etree._Element`.
"""
def _label(elt):
"""Get label or name of ``elt``."""
label = elt.get('{0}id'.format(XML_NS)) or elt.get('id') \
or (elt.findtext('login') is not None and elt.findtext('login')) \
or (elt.findtext('label') is not None and elt.findtext('label')) \
or (elt.findtext('title') is not None and elt.findtext('title')) \
or '~'
return normalize_spaces(label)
# Create XML root
if 'namespace' in relaxng:
root = '{{{0}}}{1}'.format(relaxng['namespace'], relaxng['root'])
root_elt = etree.Element(
root, version=relaxng['version'],
nsmap={None: relaxng['namespace']})
else:
root = relaxng['root']
root_elt = etree.Element(relaxng['root'], version=relaxng['version'])
# Single export
if len(elements) == 1 and 'namespace' not in relaxng:
root_elt.append(elements[0])
# Multiple export or namepsace
else:
tag = None
group_elt = None
for elt in elements:
if tag != elt.tag:
tag = elt.tag
if tag.endswith('s'):
root_elt.append(etree.Comment(' {0} '.format('=' * 68)))
root_elt.append(etree.Comment('{0:^70}'.format(tag)))
root_elt.append(etree.Comment(' {0} '.format('=' * 68)))
group_elt = root_elt
else:
root_elt.append(etree.Comment(' {0} '.format('=' * 68)))
root_elt.append(etree.Comment('{0:^70}'.format(
'{0}s'.format(tag))))
root_elt.append(etree.Comment(' {0} '.format('=' * 68)))
group_elt = etree.SubElement(root_elt, '{0}s'.format(tag))
if group_elt is not None and tag is not None and tag.endswith('s'):
group_elt.append(elt)
elif group_elt is not None:
group_elt.append(etree.Comment(' {0:~^66} '.format(
' {0} '.format(_label(elt)))))
group_elt.append(elt)
# Validate the result
error = None
if validation:
if 'namespace' in relaxng:
root_elt = etree.XML(etree.tostring(root_elt, encoding='utf-8'))
error = validate_xml(
etree.ElementTree(root_elt), relaxng4validation(relaxng), True)
return root_elt if error is None else translate(error)
# =============================================================================
[docs]
def i18n_xml_text(root_elt, xpath, namespaces=None):
"""Return a dictionary with the localized texts contained in an XML
element.
:type root_elt: lxml.etree.Element
:param root_elt:
XML root element of the localized text.
:param str xpath:
XPath expression to select localized texts.
:param dict namespaces: (optional)
Dictionary of possible name spaces.
:rtype: dict
:return:
A dictionary such ``{lang1: text_in_lang1,...}``
"""
i18n = {}
for elt in root_elt.xpath(xpath, namespaces=namespaces):
lang = elt.get('{0}lang'.format(XML_NS))
if lang and elt.text:
i18n[lang] = normalize_spaces(elt.text)
return i18n
# =============================================================================
[docs]
def db2xml_i18n_labels(dbitem, root_elt, depth):
"""Serialize i18n label and descriptio.
:param dbitem:
SQLAlchemy item.
:type root_elt: lxml.etree.Element
:param root_elt:
Root XML element.
:param int depth:
Depth of the parent element in the entire XML structure.
"""
i18n = loads(dbitem.i18n_label)
for lang in sorted(i18n):
elt = etree.SubElement(root_elt, 'label')
elt.set('{0}lang'.format(XML_NS), lang)
elt.text = i18n[lang]
if hasattr(dbitem, 'i18n_description'):
i18n = dbitem.i18n_description
if i18n:
for lang in sorted(i18n):
elt = etree.SubElement(root_elt, 'description')
elt.set('{0}lang'.format(XML_NS), lang)
elt.text = xml_wrap(i18n[lang], depth)
# =============================================================================
[docs]
def xml_wrap(text, depth):
"""Wrap a text according to the depth of the parent element.
:param str text:
Text to wrap and indent.
:param int depth:
Depth of the parent element in the entire XML structure.
:rtype: str
"""
indent = ' ' * 2 * (depth + 1)
return '\n{0}\n{1}'.format(
'\n'.join(
['{0}{1}'.format(indent, k)
for k in wrap(text, 79 - 2 * (depth + 1))]), ' ' * 2 * depth)
# =============================================================================
[docs]
def check_chrysalio_rng(relaxng_dir):
"""Check if the Relax NG files in ``relaxng_dir`` directory is the last
version and possibly update them.
:param str relaxng_dir:
Directory for Relax NG files.
"""
relaxng_dir = abspath_from_asset_spec(relaxng_dir)
cio_relaxng_dir = join(dirname(__file__), '..', 'RelaxNG')
for name in cmpfiles(cio_relaxng_dir, relaxng_dir,
('chrysalio.rnc', 'chrysalio.rng'))[1]:
try:
copy2(join(cio_relaxng_dir, name), relaxng_dir)
except IOError: # pragma: nocover
LOG.warning('"%s" is not up to date.', name)