Source code for abbyy_to_epub3.utils

# Copyright 2017 Deborah Kaplan
#
# This file is part of Abbyy-to-epub3.
# Source code is available at <https://github.com/deborahgu/abbyy-to-epub3>.
#
# Abbyy-to-epub3 is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


[docs]def is_increasing(l): """ Given a list, return True if the list elements are monotonically increasing, and False otherwise. """ for a, b in zip(l, l[1:]): if a >= b: return False return True
[docs]def dirtify_xml(text): """ Re-adds forbidden entities to any XML string. Could cause problems in the unlikely event the string literally should be '&amp' """ text = text.replace("&amp;", "&") text = text.replace("&lt;", "<") text = text.replace("&gt;", ">") text = text.replace('"', "&quot;") text = text.replace("&apos;", "'") return text
[docs]def sanitize_xml(text): """ Removes forbidden entities from any XML string """ text = text.replace("&", "&amp;") text = text.replace("<", "&lt;") text = text.replace(">", "&gt;") text = text.replace('"', "&quot;") text = text.replace("'", "&apos;") return text
[docs]def gettext(elem): """ Given an element, get all text from within element and its children. Strips out file artifact whitespace (unlike etree.itertext). """ text = elem.text or "" for e in elem: text += gettext(e) if e.tail: text += e.tail.strip() return text
[docs]def fast_iter(context, func): """ Garbage collect as you iterate to save memory Based on StackOverflow modification of Liza Daly's fast_iter """ for event, elem in context: # make sure your function processes any necessary descendants func(elem) elem.clear() # Also eliminate now-empty references from the root node to elem for ancestor in elem.xpath('ancestor-or-self::*'): while ancestor.getprevious() is not None: del ancestor.getparent()[0] del context