#!/usr/bin/python3 # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*- # # This file is part of the LibreOffice project. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. #
import sys # sadly need lxml because the python one doesn't preserve namespace prefixes # and type-detection looks for the string "office:document" from lxml import etree as ET #import xml.etree.ElementTree as ET
# document content
ps = sum([root.findall(e) for e in elementnames], [])
usedpstyles = set()
usedcondstyles = set() for p in ps:
usedpstyles.add(p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name")) if p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}cond-style-name"):
usedcondstyles.add(p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}cond-style-name")) if p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names"): for style in p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names").split(" "):
usedpstyles.add(style) for shape in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}text-style-name]"):
usedpstyles.add(shape.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}text-style-name")) for tabletemplate in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:table:1.0}paragraph-style-name]"):
usedpstyles.add(tabletemplate.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}paragraph-style-name")) for page in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}register-truth-ref-style-name]"):
usedpstyles.add(page.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}register-truth-ref-style-name")) for form in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:form:1.0}text-style-name]"):
usedpstyles.add(form.get("{urn:oasis:names:tc:opendocument:xmlns:form:1.0}text-style-name")) # conditional styles for condstyle in usedcondstyles: for map_ in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='paragraph'][@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name='" + condstyle + "']/{urn:oasis:names:tc:opendocument:xmlns:style:1.0}map"):
usedpstyles.add(map_.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}apply-style-name")) # other styles for notesconfig in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}default-style-name]"):
usedpstyles.add(notesconfig.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}default-style-name")) return usedpstyles
def add_parent_styles(usedstyles, styles):
size = -1 while size != len(usedstyles):
size = len(usedstyles) for style in styles: if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedstyles: if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"):
usedstyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name")) # only for paragraph styles and master-pages if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"):
usedstyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"))
def remove_unused_styles(root, usedstyles, styles, name): for style in styles:
print(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name")) if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") notin usedstyles:
print("removing unused " + name + " " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name")) # it is really dumb that there is no parent pointer in dom try:
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}automatic-styles").remove(style) except ValueError:
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}styles").remove(style)
def remove_unused_drawings(root, useddrawings, drawings, name): for drawing in drawings:
print(drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name")) if drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name") notin useddrawings:
print("removing unused " + name + " " + drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name"))
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}styles").remove(drawing)
def collect_all_attribute(usedstyles, attribute): for element in root.findall(".//*[@" + attribute + "]"):
usedstyles.add(element.get(attribute))
def collect_all_attribute_list(usedstyles, attribute): for element in root.findall(".//*[@" + attribute + "]"): for style in element.get(attribute).split(" "):
usedstyles.add(style)
def remove_unused(root): # 1) find all elements that may reference page styles - this gets rid of some paragraphs
usedpstyles = get_used_p_styles(root)
print(usedpstyles)
usedtstyles = set()
tables = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table")
print(tables) for table in tables:
usedtstyles.add(table.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name"))
pstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='paragraph']")
tstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table']")
usedmasterpages = {"Standard"} # assume this is the default on page 1 # only automatic styles may have page breaks in LO, so no need to chase parents or nexts for pstyle in pstyles:
print(pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name")) if pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedpstyles:
usedmasterpages.add(pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page-name")) for tstyle in tstyles: if tstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedtstyles:
usedmasterpages.add(tstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page-name")) for node in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}master-page-name]"):
usedmasterpages.add(node.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}master-page-name")) for node in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}master-page-name]"):
usedmasterpages.add(node.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}master-page-name"))
print(usedmasterpages) # iterate parent/next until no more masterpage is added
size = -1 while size != len(usedmasterpages):
size = len(usedmasterpages) for mp in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page"): if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedmasterpages: if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"):
usedmasterpages.add(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name")) if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"):
usedmasterpages.add(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name")) # remove unused masterpages for mp in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page"): if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") notin usedmasterpages:
print("removing unused master page " + mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name")) # there is no way to get the parent element???
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}master-styles").remove(mp)
# 3) unused list styles - keep referenced from still used paragraph styles
usedliststyles = set() for style in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}list-style-name]"):
usedliststyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}list-style-name")) for list_ in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
usedliststyles.add(list_.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name")) for listitem in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-item[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-override]"):
usedliststyles.add(listitem.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-override")) for numpara in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}numbered-paragraph[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
usedliststyles.add(list_.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name")) # ignore ones that are children of style:graphic-properties, those must be handled as the containing style # there is no inheritance for these
liststyles = root.findall("./*/{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-style")
remove_unused_styles(root, usedliststyles, liststyles, "list style")
tables = { "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table:background",
}
tablecells = { "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}covered-table-cell", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-cell", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}body", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}even-columns", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}even-rows", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}first-column", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}first-row", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}last-column", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}last-row", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}odd-columns", "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}odd-rows",
} for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name]"):
style = element.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name") if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-column":
usedtablecolumnstyles.add(style) elif element.tag == "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-row":
usedtablerowstyles.add(style) elif element.tag in tables:
usedtablestyles.add(style) elif element.tag in tablecells:
usedtablecellstyles.add(style)
for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:database:1.0}style-name]"):
style = element.get("{urn:oasis:names:tc:opendocument:xmlns:database:1.0}style-name") if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}column":
usedtablecolumnstyles.add(style) else: # db:query db:table-representation
usedtablestyles.add(style)
# 13) unused font-face-decls
usedfonts = set()
collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name")
collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name-asian")
collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name-complex")
fonts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-face") for font in fonts: if font.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") notin usedfonts:
print("removing unused font-face " + font.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}font-face-decls").remove(font)
# 14) remove rsid attributes
styles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style") for style in styles:
tp = style.find(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}text-properties") if tp isnotNone: if"{http://openoffice.org/2009/office}rsid"in tp.attrib:
print("removing rsid from " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name")) del tp.attrib["{http://openoffice.org/2009/office}rsid"] if"{http://openoffice.org/2009/office}paragraph-rsid"in tp.attrib:
print("removing paragraph-rsid from " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name")) del tp.attrib["{http://openoffice.org/2009/office}paragraph-rsid"]
# 15) unused user field decls
useduserfields = set() for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-get"):
useduserfields.add(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name")) for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-input"):
useduserfields.add(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name")) for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-decl"): if field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name") notin useduserfields:
print("removing unused user-field-decl " + field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
root.find(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-decls").remove(field)
# remove office:settings
settings = root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}settings") if settings isnotNone:
root.remove(settings)
# scripts are almost never needed
scripts = root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}scripts") if scripts isnotNone:
root.remove(scripts)
# remove theme
theme = root.find(".//{urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0}theme") if theme isnotNone:
theme.getparent().remove(theme)
# TODO: replace embedded image with some tiny one # TODO: perhaps replace text with xxx (optionally)?
if __name__ == "__main__":
infile = sys.argv[1]
outfile = sys.argv[2]
"""
TODO
chart:style-name
-> chart
style:data-style-name
-> data style
style:percentage-data-style-name
-> data style """
# vim: set shiftwidth=4 softtabstop=4 expandtab:
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.17Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.