#!/usr/bin/env perl # latexrevise - takes output file of latexdiff and removes either discarded # or appended passages, then deletes all other latexdiff markup # # Copyright (C) 2004 F J Tilmann (tilmann@gfz-potsdam.de) # # Repository: https://github.com/ftilmann/latexdiff # CTAN page: http://www.ctan.org/pkg/latexdiff # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Detailed usage information at the end of the file # Note: version number now keeping up with latexdiff # Version 1.3.2 # - Functionality -c option: if comment to be deleted is the last in the line replace new line by space character. This is done to avoid errors or unintended line breaks from empty lines after deletion (which are implicit \par) # - Introduce option -r / --replace which removes markup of the form \replaced{new text}{old text} in the changes package # Version 1.3.1 (Bug fix) Remove some deprecation warnings due to uncommented left parentheses # Version 1.0.2 Option --version # Version 1.0.1 no changes to latexrevise # Version 0.3 Updated for compatibility with latexdiff 0.3 output (DIFAUXCMD removal) # Version 0.1 First public release use Getopt::Long ; use strict; use warnings; my $versionstring=< \$accept, 'decline|d'=> \$decline, 'simplify|s' => \$simplify, 'comment|c=s' => \$comment, 'comment-environment|e=s' => \$comenv, 'markup|m=s' => \$markup, 'markup-environment|n=s' => \$markenv, 'replace|r=s' => \$replace, 'no-warnings|q' => \$verbose, 'version' => \$version, 'verbose|V' => \$verbose, 'help|h|H' => \$help); if ( $help ) { usage() ; } if ( $version ) { print STDERR $versionstring ; exit 0; } if ( ($accept && $decline) || ($accept && $simplify) || ($decline && $simplify) ) { die '-a,-d and -s options are mutually exclusive. Type latexrevise -h to get more help.'; } print STDERR "ACCEPT mode\n" if $verbose && $accept; print STDERR "DECLINE mode\n" if $verbose && $decline; print STDERR "SIMPLIFY mode. WARNING: The output will not normally be valid latex,\n" if $verbose && $simplify; # Slurp old and new files { local $/ ; # locally set record operator to undefined, ie. enable whole-file mode $input=<>; } # split into parts ($preamble,$body,$post)=splitdoc($input,'\begin{document}','\end{document}'); if (length $preamble && ( $accept || $decline ) ) { # # WORK ON PREAMBLE # # (compare subroutine linediff in latexdiff to make sure correct strings are used) # remove extra commands added to preamble by latexdiff $preamble =~ s/${PREAMBLEXTBEG}.*?${PREAMBLEXTEND}\n{0,1}//smg ; if ( $accept ) { # delete mark up in appended lines $preamble =~ s/^(.*) %DIF > $/$1/mg ; } elsif ( $decline ) { # delete appended lines # $preamble =~ s/^(.*) %DIF > $//mg ; $preamble =~ s/^(.*) %DIF > \n//mg ; # delete markup in deleted lines $preamble =~ s/^%DIF < //mg ; } # remove any remaining DIF markups #$preamble =~ s/%DIF.*$//mg ; $preamble =~ s/%DIF.*?\n//sg ; } #print $preamble ; # # WORK ON BODY # if ($accept) { # remove ADDMARKOPEN, ADDMARKCLOSE tokens @matches= $body =~ m/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/sg; checkpure(@matches); $body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/$1/sg; # remove text flanked by DELMARKOPEN, DELMARKCLOSE tokens @matches= $body =~ m/${DELMARKOPEN}(.*?)${DELMARKCLOSE}/sg; checkpure(@matches); $body =~ s/${DELMARKOPEN}(.*?)${DELMARKCLOSE}//sg; # remove markup of added comments $body =~ s/%${ADDCOMMENT}(.*?)$/%$1/mg ; # remove deleted comments (full line) $body =~ s/^%${DELCOMMENT}.*?\n//mg ; # remove deleted comments (part of line) $body =~ s/%${DELCOMMENT}.*?$//mg ; } elsif ( $decline) { # remove DELMARKOPEN, DELMARKCLOSE tokens @matches= $body =~ m/${DELMARKOPEN}(.*?)${DELMARKCLOSE}/sg; checkpure(@matches); $body =~ s/${DELMARKOPEN}(.*?)${DELMARKCLOSE}/$1/sg; # remove text flanked by ADDMARKOPEN, ADDMARKCLOSE tokens # as latexdiff algorithm keeps the formatting and white spaces # of the new text, sometimes whitespace might be inserted or # removed inappropriately. We try to guess whether this has # happened # Mop up tokens. This must be done already now as otherwise # detection of white-space problems does not work $cnt = $body =~ s/${DELOPEN}($pat_n)${DELCLOSE}/$1/sg; # remove markup of deleted commands $cnt += $body =~ s/${DELCMDOPEN}(.*?)${DELCMDCLOSE}/$1/sg ; $cnt += $body =~ s/${DELCMDOPEN}//g ; # remove aux commands $cnt += $body =~ s/^.*${AUXCMD}$/${someword}/mg; $body =~ s/${someword}\n//g; while ( $body =~ m/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/s ) { $prematch=$`; $postmatch=$'; checkpure($1); if ( $prematch =~ /\w$/s && $postmatch =~ /^\w/ ) { # apparently no white-space between word=>Insert white space $body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/ /s ; } elsif ( $prematch =~ /\s$/s && $postmatch =~ /^[.,;:]/ ) { # space immediately before one of ".,:;" => remove this space $body =~ s/\s${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}//s ; } else { # do not insert or remove any extras $body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}//s; } } # Alternative without special cases treatment # @matches= $body =~ m/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}/sg; # checkpure(@matches); # $body =~ s/${ADDMARKOPEN}(.*?)${ADDMARKCLOSE}//sg; # remove markup of deleted comments $body =~ s/%${DELCOMMENT}(.*?)$/%$1/mg ; # remove added comments (full line) $body =~ s/^%${ADDCOMMENT}.*?\n//mg ; # remove added comments (part of line) $body =~ s/%${ADDCOMMENT}.*?$//mg ; } # remove any remaining tokens if ( $accept || $decline || $simplify ) { # first substitution command deals with special case of added paragraph $cnt = $body =~ s/${ADDOPEN}($pat_n)\n${ADDCLOSE}\n/$1\n/sg; $cnt += $body =~ s/${ADDOPEN}($pat_n)${ADDCLOSE}/$1/sg; $cnt==0 || warn 'Remaining $ADDOPEN tokens in DECLINE mode\n' unless ( $quiet || $accept || $simplify ); } if ($accept || $simplify ) { # Note: in decline mode these commands have already been removed above $cnt = $body =~ s/${DELOPEN}($pat_n)${DELCLOSE}/$1/sg; #### remove markup of deleted commands $cnt += $body =~ s/${DELCMDOPEN}(.*?)${DELCMDCLOSE}/$1/sg ; $cnt += $body =~ s/${DELCMDOPEN}//g ; # remove aux commands # $cnt += $body =~ s/^.*${AUXCMD}$/${someword}/mg; $body =~ s/${someword}\n//g; #### remove deleted comments ###$cnt += $body =~ s/${DIFDELCMD}.*?$//mg ; $cnt==0 || warn 'Remaining $DELOPEN or $DIFDELCMD tokens in ACCEPT mode\n' unless ( $quiet || $simplify ); } # Remove comment commands if (defined($comment)) { print STDERR "Removing \\$comment\{..\} sequences (incl. argument)..." if $verbose; # protect $comments in comments by making them look different $body =~ s/(%.*)${comment}(.*)$/$1${someword}$2/mg ; # carry out the substitution $cnt = 0 + $body =~ s/\\${comment}(?:\[${brat0}\])?\{${pat_n}\}(?: *\n)?//sg ; print STDERR "$cnt matches found and removed.\n" if $verbose; # and undo the protection substitution $body =~ s/(%.*)${someword}(.*)$/$1${comment}$2/mg ; } if (defined($comenv)) { print STDERR "Removing $comenv environments ..." if $verbose; $body =~ s/(%.*)${comenv}/$1${someword}/mg ; ## $cnt = 0 + $body =~ s/\\begin(?:\[${brat0}\])?\{\$comenv\}.*?\\end\{\$comenv\}//sg ; $cnt = 0 + $body =~ s/\\begin(?:\[${brat0}\])?\{${comenv}\}.*?\\end\{${comenv}\}\s*?\n//sg ; print STDERR "$cnt matches found and removed.\n" if $verbose; $body =~ s/(%.*)${someword}/$1${comenv}/mg ; } if (defined($markup)) { print STDERR "Removing \\$markup\{..\} commands (leaving argument)..." if $verbose; # protect $markups in comments by making them look different $body =~ s/(%.*)${markup}(.*)$/$1${someword}$2/mg ; # carry out the substitution $cnt = 0 + $body =~ s/\\${markup}(?:\[${brat0}\])?\{(${pat_n})\}/$1/sg ; print STDERR "$cnt matches found and removed.\n" if $verbose; # and undo the protection substitution $body =~ s/(%.*)${someword}(.*)$/$1${markup}$2/mg ; } if (defined($markenv)) { print STDERR "Removing $markenv environments ..." if $verbose; $body =~ s/(%.*)${markenv}/$1${someword}/mg ; $cnt = 0 + $body =~ s/\\begin(?:\[${brat0}\])?\{${markenv}\}\n?//sg; $cnt += 0 + $body =~ s/\\end\{${markenv}\}\n?//sg; print STDERR $cnt/2, " matches found and removed.\n" if $verbose; $body =~ s/(%.*)${someword}/$1${markenv}/mg ; } if (defined($replace)) { print STDERR "Removing \\$replace\{..\}\{..\} commands (leaving 1st and discarding 2nd argument))..." if $verbose; # protect $markups in comments by making them look different $body =~ s/(%.*)${replace}(.*)$/$1${someword}$2/mg ; # carry out the substitution $cnt = 0 + $body =~ s/\\${replace}(?:\[${brat0}\])?\{(${pat_n})\}\s?\{(${pat_n})\}/$1/sg ; print STDERR "$cnt matches found and removed.\n" if $verbose; # and undo the protection substitution $body =~ s/(%.*)${someword}(.*)$/$1${replace}$2/mg ; } if ( length $preamble ) { print "$preamble\\begin{document}${body}\\end{document}$post"; } else { print $body; } # checkpure(@matches) # checks whether any of the strings in matches contains # $ADDMARKOPEN, $ADDMARKCLOSE,$DELMARKOPEN, or $DELMARKCLOSE # If so, die reporting nesting problems, otherwise return to caller sub checkpure { while (defined($_=shift)) { if ( /$ADDMARKOPEN/ || /$ADDMARKCLOSE/ || /$DELMARKOPEN/ || /$DELMARKCLOSE/ ) { die <=0 && $j>$i ) { $part1 = substr($text,0,$i) ; $part2 = substr($text,$i+$l1,$j-$i-$l1); $part3 = substr($text,$j+$l2) unless $j+$l2 >= length $text; } else { die "$word1 or $word2 not in the correct order or not present as a pair." } return ($part1,$part2,$part3); } sub usage { print STDERR <<"EOF"; Usage: $0 [OPTIONS] [diff.tex] > revised.tex Read a file diff.tex (output of latexdiff), and remove its markup. If no filename is given read from standard input. The command can be used in ACCEPT, DECLINE, or SIMPLIFY mode, and be used to remove user-defined latex commands from the input (see options -c, -e, -m, -n below). In ACCEPT mode, all appended text fragments (or preamble lines) are kept, and all discarded text fragments (or preamble lines) are deleted. In DECLINE mode, all discarded text fragments are kept, and all appended text fragments are deleted. If you wish to keep some changes, edit the diff.tex file in advance, and manually remove those tokens which would otherwise be deleted. Note that latexrevise only pays attention to the \\DIFaddbegin, \\DIFaddend, \\DIFdelbegin, and \\DIFdelend tokens and corresponding FL varieties. All \\DIFadd and \\DIFdel commands (but not their content) are simply deleted. The commands added by latexdiff to the preamble are also removed. In SIMPLIFY mode all latexdiff markup is removed from the body of the text (after \\begin{document}) except for \\DIFaddbegin, \\DIFaddend, \\DIFdelbegin, \\DIFdelend tokens and the corresponding FL varieties of those commands. The result will not in general be valid latex-code but might be easier to read and edit in preparation for a subsequent run in ACCEPT or DECLINE mode. In SIMPLIFY mode the preamble is left unmodified. -a --accept Run in ACCEPT mode (delete all blocks marked by \\DIFdelbegin and \\DIFdelend). -d --decline Run in DECLINE mode (delete all blocks marked by \\DIFaddbegin and \\DIFaddend). -s --simplify Run in SIMPLIFY mode (Keep all \\DIFaddbegin, \\DIFaddend, \\DIFdelbegin, \\DIFdelend tokens, but remove all other latexdiff markup from body. Note that the three mode options are mutually exclusive. If no mode option is given, latexrevise simply removes user annotations and markup according to the following four options; these functions can be very useful outside the latexdiff context, too. -c cmd --comment=cmd Remove \\cmd{...}. cmd is supposed to mark some explicit annotations which should be removed from the file before release. -e envir --comment-environment=envir Remove explicit annotation environments from the text, i.e. remove \\begin{envir} ... \\end{envir} blocks. -m cmd --markup=cmd Remove the markup command cmd but leave its argument, i.e. turn \\cmd{abc} into abc. -n envir --markup-environment=envir Similarly, remove \\begin{envir} and \\end{envir} commands, but leave content of the environment in the text. -r cmd --replace=cmd For constructions \\cmd{..}{..}, remove the command, leave the content of first argument, and delete second argument. -q --no-warnings Do not warn users about \\DIDadd{..} or \\DIFdel statements which should not be there anymore -V --verbose Verbose output EOF exit 0; } =head1 NAME latexrevise - selectively remove markup and text from latexdiff output =head1 SYNOPSIS B [ B ] [ F ] > F =head1 DESCRIPTION I reads a file C (output of I), and remove the markup commands. If no filename is given the input is read from standard input. The command can be used in I, I, or I mode, or can be used to remove user-defined latex commands from the input (see B<-c>, B<-e>, B<-m>, and B<-n> below). In I mode, all appended text fragments (or preamble lines) are kept, and all discarded text fragments (or preamble lines) are deleted. In I mode, all discarded text fragments are kept, and all appended text fragments are deleted. If you wish to keep some changes, edit the diff.tex file in advance, and manually remove those tokens which would otherwise be deleted. Note that I only pays attention to the C<\DIFaddbegin>, C<\DIFaddend>, C<\DIFdelbegin>, and C<\DIFdelend> tokens and corresponding FL varieties. All C<\DIFadd> and C<\DIFdel> commands (but not their contents) are simply deleted. The commands added by latexdiff to the preamble are also removed. In I mode, C<\DIFaddbegin, \DIFaddend, \DIFdelbegin, \DIFdelend> tokens and their corresponding C varieties are kept but all other markup (e.g. C and <\DIFdel>) is removed. The result will not in general be valid latex-code but it will be easier to read and edit in preparation for a subsequent run in I or I mode. In I mode the preamble is left unmodified. =head1 OPTIONS =over 4 =item B<-a> or B<--accept> Run in I mode (delete all blocks marked by C<\DIFdelbegin> and C<\DIFdelend>). =item B<-d> or B<--decline> Run in I mode (delete all blocks marked by C<\DIFaddbegin> and C<\DIFaddend>). =item B<-s> or B<--simplify> Run in I mode (Keep all C<\DIFaddbegin>, C<\DIFaddend>, C<\DIFdelbegin>, C<\DIFdelend> tokens, but remove all other latexdiff markup from body). =back Note that the three mode options are mutually exclusive. If no mode option is given, I simply removes user annotations and markup according to the following four options. These functions can be very useful outside the latexdiff context, too =over 4 =item B<-c cmd> or B<--comment=cmd> Remove C<\cmd{...}> sequences. C is supposed to mark some explicit annotations which should be removed from the file before release. =item B<-e envir> or B<--comment-environment=envir> Remove explicit annotation environments from the text, i.e. remove \begin{envir} ... \end{envir} blocks. =item B<-m cmd> or B<--markup=cmd> Remove the markup command C<\cmd> but leave its argument, i.e. turn C<\cmd{abc}> into C. =item B<-n envir> or B<--markup-environment=envir> Similarly, remove C<\begin{envir}> and C<\end{envir}> commands but leave content of the environment in the text. =item B<-r cmd> or B<--replace=cmd> For constructions C<\\cmd{..}{..}>, remove the command, leave the content of first argument, and delete second argument, i.e. turn C<\cmd{abc}{def}> into C. =item B<-V> or B<--verbose> Verbose output =item B<-q> or B<--no-warnings> Do not warn users about C<\DIDadd{..}> or C<\DIFdel{..}> statements which should have been removed already. =back =head1 BUGS The current version is a beta version which has not yet been extensively tested. It has not been actively maintained so might not process output of newer versions of latexdiff entirely correctly. Please submit bug reports using the issue tracker of the github repository page I, or send them to I. Include the serial number of I (Option --version). If you come across latexdiff output which is not processed correctly by I please include the problem file as well as the old and new files on which it is based, ideally edited to only contain the offending passage as long as that still reproduces the problem. Note that I gets confused by commented C<\begin{document}> or C<\end{document}> statements =head1 SEE ALSO L =head1 PORTABILITY I does not make use of external commands and thus should run on any platform supporting PERL v5 or higher. =head1 AUTHOR Copyright (C) 2004 Frederik Tilmann This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License Version 3 =cut