#!/usr/bin/env perl # prt-notes.pl # outputs text only version of notes from a Libreoffice Impress file # It will also work on the ODP save of a Microsoft Office Powerpoint presentation # (c) Richard Roth - rroth@on-the-net.com # Released under Mozilla Public License V2.0 http://mozilla.org/MPL/2.0/ $deb = 0; use Data::Dumper; use XML::LibXML::Reader; use XML::LibXML::PrettyPrint; use Text::Unidecode; #use XML::XPath; #use XML::XPath::XMLParser; $in = $ARGV[0]; binmode STDOUT, ":utf8"; #qx!cp -av $in /tmp/present.zip!; qx!unzip -p -- '$in' content.xml >/tmp/content.xml!; my $document = XML::LibXML->new->parse_file("/tmp/content.xml"); #my $reader = XML::LibXML::Reader->new(IO => $zipin); #$reader->finish; #$document = $reader->document; my $pp = XML::LibXML::PrettyPrint->new(indent_string => " "); # $pp->pretty_print($document); # modified in-place $notes = $document->toString; $pages = $document->find("//draw:page"); sub pageSort { my($a, $b) = @_; $pa = $a->getAttribute("draw:name"); $pa =~ s/Slide//; $pb = $b->getAttribute("draw:name"); $pb =~ s/Slide//; return $pa <=> $pb; } for $page (sort { pageSort($a, $b) } @$pages) { print "=1111==============\n" if($deb); print __LINE__ . ":".Dumper($page) if($deb); print $pp->pretty_print($page) if($deb); $pageNum = $page->getAttribute("draw:name"); print "---- PAGE=$pageNum\n"; print "--- Title: ----\n"; $titleElem = $page->find('./draw:frame[@presentation:class="title"]'); print __LINE__.":".Dumper($titleElem) if($deb); if($titleElem) { print $pp->pretty_print($titleElem->[0]) if($deb); $realtexts = $titleElem->[0]->find(".//text:span/text()"); for $rtext (@$realtexts) { print __LINE__.":".Dumper($rtext) if($deb); # print $pp->pretty_print($rtext)->toString if($deb); print unidecode($rtext->data) . "\n"; } } print ("=222" x 20) ."\n" if($deb); $notes = $page->find("./presentation:notes"); print __LINE__.":".Dumper($notes) if($deb); for $note (@$notes) { print __LINE__.":".Dumper($note) if($deb); print $pp->pretty_print($note) if($deb); print "---- NOTES: ---------------\n"; $texts = $note->find(".//draw:text-box"); for $text (@$texts) { print __LINE__.":".Dumper($text) if($deb); print $pp->pretty_print($text)->toString if($deb); $realtexts = $text->find(".//text:span/text()"); print __LINE__.":".Dumper($realtexts) if($deb); for $rtext (@$realtexts) { # print __LINE__.":".Dumper($rtext) if($deb); # print $pp->pretty_print($rtext)->toString; $body = unidecode($rtext->data); print $body . "\n" if($body); } } } print "\n\n"; }