misc/fcwtool/fcw/__init__.py

367 lines
13 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# fcw.py: a simple module/script collection for handling fcw files used by
# the SMART Meeting Pro smartboard software.
# Copyright 2019 Christopher Spinrath <christopher.spinrath@tu-dortmund.de>
# fcw.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# fcw.py is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with fcw.py. If not, see <http://www.gnu.org/licenses/>.
import click
import enum
import io
import os
import pathlib
import re
import shutil
import sys
import tempfile
import lxml.etree
import zipfile
TEMPDIR_SUFFIX = '.fcw.unpacked'
MANIFEST_FILE = 'imsmanifest.xml'
METADATA_FILE = 'metadata.xml'
MANIFEST_DEFAULT_NAMESPACE = 'http://www.imsglobal.org/xsd/imscp_v1p1'
SMARTNOTEBOOK_NAMESPACE = 'http://www.smarttech.com/2006-01/notebook'
MANIFEST_NAMESPACES = {
'default': MANIFEST_DEFAULT_NAMESPACE,
'lom': 'http://ltsc.ieee.org/xsd/LOM',
'smartgallery': 'http://www.smarttech.com/2006-01/gallery',
'smartnotebook': SMARTNOTEBOOK_NAMESPACE,
}
METADATA_NAMESPACES = {
'lom': 'http://ltsc.ieee.org/xsd/LOM',
'smartgallery': 'http://www.smarttech.com/2006-01/gallery',
}
FILE_TAG_NAME = '{{{}}}file'.format(MANIFEST_DEFAULT_NAMESPACE)
class FCWResource(enum.Enum):
PAGES = 'pages'
IMAGES = 'images'
SOUNDS = 'sounds'
ATTACHMENTS = 'attachments'
FLASH = 'flash'
VIDEOS = 'videos'
ANNOTATIONMETADATA = 'annotationmetadata'
BRUSH = 'brush'
class FCWFile:
def __init__(self, filename):
self._manifest = None
self._metadata = None
self._filename = filename
self._workdir_handler = tempfile.TemporaryDirectory(suffix = TEMPDIR_SUFFIX)
self._unpack()
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
if self._workdir_handler:
self._workdir_handler.cleanup()
return False # we don't handle exceptions here
@property
def workdir(self):
return pathlib.Path(self._workdir_handler.name)
@property
def filename(self):
return self._filename
@property
def manifest_filename(self):
return self.workdir / MANIFEST_FILE
@property
def metadata_filename(self):
return self.workdir / METADATA_FILE
@property
def manifest(self):
if not self._manifest:
self._manifest = lxml.etree.parse(str(self.manifest_filename))
return self._manifest
@property
def metadata(self):
if not self._metadata:
self._metadata = lxml.etree.parse(str(self.metadata_filename))
return self._metadata
@property
def manifest_version(self):
root = self.manifest.getroot()
return root.attrib['version']
@property
def creationdatetime(self):
root = self.metadata.getroot()
query = './/lom:lifeCycle/smartgallery:creationdatetime'
tag = root.find(query, METADATA_NAMESPACES)
return tag.text
@property
def filesource(self):
root = self.manifest.getroot()
return root.attrib["{{{}}}filesource".format(SMARTNOTEBOOK_NAMESPACE)]
def get_resources(self, identifier):
root = self.manifest.getroot()
query = './/default:resources/default:resource[@identifier="{}"]'.format(identifier.value)
tags = root.findall(query, MANIFEST_NAMESPACES)
assert len(tags) == 1, 'Failed to query resources \"{}\"'.format(identifier)
return tags[0]
def _get_file_path(self, xml_element):
# So far all paths were windows paths, thus we have to parse them as such
return pathlib.PureWindowsPath(xml_element.attrib['href'])
def get_pages(self):
return self.get_resource_files(FCWResource.PAGES)
def get_resource_files(self, resource_type):
resources = self.get_resources(resource_type)
tags = resources.findall('default:file', MANIFEST_NAMESPACES)
return [ self._get_file_path(t) for t in tags ]
def get_all_resource_files(self):
return sum([ self.get_resource_files(r) for r in FCWResource ], [])
def get_startpage(self):
resources = self.get_resources(FCWResource.PAGES)
startpage = self._get_file_path(resources)
pages = self.get_pages()
index = pages.index(startpage)
return (index, startpage)
def set_startpage(self, index):
pages = self.get_pages()
new_startpage = pathlib.PureWindowsPath(pages[index])
resources = self.get_resources(FCWResource.PAGES)
resources.set('href', str(new_startpage))
def _get_max_page_index(self):
pages = self.get_pages()
regex = '[0-9]+' # We assume here that page files are named pageN.svg where N is a natural number
indices = ( int(re.search(regex, str(p)).group()) for p in pages )
return max(indices)
def append_svg(self, svg_filename):
next_page_index = self._get_max_page_index() + 1
# new_name has to be a WindowsPath because we write it to the manifest
new_name = pathlib.PureWindowsPath("page{}.svg".format(next_page_index))
self._append_resource_file(FCWResource.PAGES, svg_filename, new_name)
def append_pages(self, other):
for page in other.get_pages():
old_filename = other.workdir / page
self.append_svg(old_filename)
def _append_resources(self, other, resource_type):
other_resources = other.get_resources(resource_type)
self_resource_files = self.get_all_resource_files()
for r in other_resources:
if r.tag != FILE_TAG_NAME:
print("WARNING: Unknown resource tag for type \"{}\" in {}.".format(resource_type.value, other.filename))
print("WARNING: unable to merge, the output file might be broken/unusable!")
continue
href = other._get_file_path(r)
#TODO: in the following case more can be done, but inter-resources references
# had to be adapted as well, in particular, those in pages
if href in self_resource_files:
print("WARNING: both files have a resource named \"{}\".".format(href))
print("WARNING: unable to merge, the output file might be broken/unusable!")
continue
other_filename = other.workdir / href
self._append_resource_file(resource_type, other_filename, href)
def _append_resource_file(self, resource_type, src_filename, href):
# make sure href is a WindowsPath, since we are writing it to the manifest
href = pathlib.PureWindowsPath(href)
new_filename = self.workdir / href
assert not new_filename.exists()
new_filename.parent.mkdir(parents = True, exist_ok = True)
shutil.copy(src_filename, new_filename)
lxml.etree.SubElement(
self.get_resources(resource_type),
FILE_TAG_NAME,
attrib = { 'href': str(href) },
)
def append(self, other):
self.append_pages(other)
for r in FCWResource:
if r == FCWResource.PAGES:
continue
self._append_resources(other, r)
def export_pdf(self, target, interactive = False):
from cairosvg import svg2pdf
from PyPDF2 import PdfFileMerger
from tqdm import tqdm
merger = PdfFileMerger()
pages = self.get_pages()
for page in tqdm(pages, disable = not interactive):
filename = self.workdir / page
pdf = svg2pdf(url = str(filename))
pdfstream = io.BytesIO(pdf)
merger.append(fileobj = pdfstream)
merger.write(str(target))
def save(self, filename = None):
if filename is None:
filename = self.filename
# write manifest
self.manifest.write(
str(self.manifest_filename),
encoding = 'UTF-8',
xml_declaration = True,
standalone = True,
)
self._pack(filename)
def _pack(self, filename):
with zipfile.ZipFile(filename, 'w') as zf:
for directory, subdirectories, filenames in os.walk(self.workdir):
for filename in filenames:
reldir = pathlib.Path(directory).relative_to(self.workdir)
rel_filename = reldir / filename
abs_filename = self.workdir / rel_filename
zf.write(str(abs_filename), arcname = str(rel_filename))
def _unpack(self):
with zipfile.ZipFile(self.filename, 'r') as zf:
zf.extractall(self.workdir)
@click.group()
def _cli():
pass
@_cli.command(name = 'topdf')
@click.argument('input', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
@click.argument('output', required = False, type = click.Path(exists = False, writable = True, file_okay = True, dir_okay = False))
def _fcw2pdf(input, output):
input = pathlib.Path(input)
if output is None:
if input.suffix == '.fcw':
output = input.with_suffix('.pdf')
else:
output = input.with_name(input.name + '.pdf')
click.echo(":: Converting {} to {}.".format(input, output))
with FCWFile(input) as fcw:
fcw.export_pdf(output, interactive = True)
@_cli.command(name = 'merge')
@click.argument('input1', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
@click.argument('input2', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
@click.argument('output', type = click.Path(exists = False, writable = True, file_okay = True, dir_okay = False))
def _fcwmerge(input1, input2, output):
click.echo(":: Merging {} and {} into {}".format(input1, input2, output))
with FCWFile(input1) as fcw1, FCWFile(input2) as fcw2:
fcw1.append(fcw2)
fcw1.save(output)
#NOTE: Appending arbitrary SVG files does not work, thus the following command is disabled.
# It seems only a subset of the SVG spec/only some CSS classes are available in SMART Meeting Pro.
#@__cli.command(name = 'append-svg')
#@click.argument('input', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
#@click.argument('svgfile', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
#@click.argument('output', type = click.Path(exists = False, writable = True, file_okay = True, dir_okay = False))
#def __fcwappendsvg(input, svgfile, output):
# click.echo(":: Appending {} to {} resulting in {}".format(svgfile, input, output))
#
# with FCWFile(input) as fcw:
# fcw.append_svg(svgfile)
# fcw.save(output)
@_cli.command(name = 'info')
@click.argument('input', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
def _fcwinfo(input):
with FCWFile(input) as fcw:
click.echo("Manifest version: {}".format(fcw.manifest_version))
click.echo("Creation datetime: {}".format(fcw.creationdatetime))
click.echo("Filesource: {}".format(fcw.filesource))
index, startpage = fcw.get_startpage()
click.echo("Start page index: {} (internal page name is {})".format(index, startpage))
click.echo()
click.echo("Resources:")
for resource in FCWResource:
click.echo("#{}: {}".format(resource.value, len(list(fcw.get_resources(resource)))))
@_cli.command(name = 'set-startpage')
@click.argument('input', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
@click.argument('index', type = click.INT)
@click.argument('output', required = False, type = click.Path(exists = False, writable = True, file_okay = True, dir_okay = False))
def _fcwsetstartpage(input, index, output):
print("WARNING: this function has never been tested!")
if output is None:
output = input
with FCWFile(input) as fcw:
fcw.set_startpage(index)
fcw.save(output)
index, newstartpage = fcw.get_startpage()
print("New startpage index is {} (internal page name is {})".format(index, newstartpage))
@_cli.command(name = 'print-manifest')
@click.argument('input', type = click.Path(exists = True, readable = True, file_okay = True, dir_okay = False))
def _fcwmanifest(input):
with FCWFile(input) as fcw:
stream = io.BytesIO()
fcw.manifest.write(stream, encoding = 'utf-8', xml_declaration = True, standalone = True)
stream.seek(0)
print(stream.read().decode('utf-8'))
@_cli.command(name = 'license')
def _license():
progname = pathlib.Path(sys.argv[0]).name
click.echo("{} Copyright (C) 2019 Christopher Spinrath <christopher.spinrath@tu-dortmund.de>".format(progname))
click.echo("This program comes with ABSOLUTELY NO WARRANTY.")
click.echo("This is free software, and you are welcome to redistribute it")
click.echo("under certain conditions; see the LICENSE file for details.")