Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/sources/formale Sprachen/C/Lyx/lib/reLyX/ (Lyx Textverarbeitung ^©) Datei vom 25.0.1999 mit Größe 11 kB

Quelle CleanTeX.pm Sprache: unbekannt

# This file is part of reLyX
# Copyright (c) 1998-9 Amir Karger karger@post.harvard.edu
# You are free to use and modify this code under the terms of
# the GNU General Public Licence version 2 or later.

package CleanTeX;
# This package prepares a LaTeX file for translation to LyX
# - Translates some local commands (e.g., {\em blah} to {\emph{blah}})
# - Prepares math mode stuff for LyX. LyX reads LaTeX math mode directly,
#      so reLyX can basically copy all math mode exactly, but LyX is a
#      bit stricter than LaTeX. E.g., translate 'x^2' -> 'x^{2}
# - Removes optional arguments if LyX doesn't understand them, e.g. \sqrt

use strict;

use Verbatim;

######
# Global variables
my $last_eaten; # last token we ate

# List of commands for which LyX doesn't support the optional argument
my @DeleteOptArg = map {"\\$_"} qw(sqrt \\ \\*
              chapter section subsection subsubsection paragraph subparagraph
       );

my $debug_on; # was -d option given?

#########################   PARSER INVOCATION   ################################
sub call_parser {
# This subroutine opens the TeX parser and processes the file.
# Arg0 is the name of the input TeX file
# Arg1 is the name of the output "clean" file

    my ($InFileName, $OutFileName) = (shift,shift);

    $debug_on = (defined($main::opt_d) && $main::opt_d);
    my $zzz=$debug_on ? " TeX file ($InFileName --> $OutFileName)\n" :"... ";
    print STDERR "Cleaning$zzz";
    open (OUTFILE, ">$OutFileName") or die "problem opening $OutFileName: $!\n";

# Create the list of tokens for the parser
# Parts of the token list are swiped from TeX.pm
    my %MyTokens = ( '{' => $Text::TeX::Tokens{'{'},
       '}' => $Text::TeX::Tokens{'}'},
       '$' => $Text::TeX::Tokens{'$'},
       '$$' => $Text::TeX::Tokens{'$$'},
       '\begin' => $Text::TeX::Tokens{'\begin'},
       '\end' => $Text::TeX::Tokens{'\end'},
     );

    # Put local tokens, like \em, into %MyTokens
    #Note: \cal is "local", although it's found in math mode
    # (The "map" just puts a backslash in front of each word in the list)
    my @LocalTokens = qw (em rm bf tt sf sc sl it
    rmfamily ttfamily sffamily mdseries bfseries
   upshape itshape slshape scshape cal
   );
    foreach (@LocalTokens) {
$MyTokens{"\\$_"} = $Text::TeX::Tokens{'\em'}
    }
    # Now add any commands
    &ReadCommands::Merge(\%MyTokens);

# Create the fileobject
    my $file = new Text::TeX::OpenFile
    $InFileName,
    'defaultact' => \&clean_tex,
    'tokens' => \%MyTokens;

# Now actually process the file
    $file->process;
    close OUTFILE;
    #warn "Done cleaning TeX file\n";
} # end sub call_parser

#######################   MAIN TRANSLATING SUBROUTINE   ########################
# Routine called by the TeX-parser to perform token-processing.
sub clean_tex {
    my($eaten,$txt) = (shift,shift);
    my ($outstr, $type);

    # Sub translate is given a string and one of the translation tables below.
    # It returns the translation, or just the string if there's no translation
    # Translation table for TT::Begin::Group tokens
    my %begtranstbl = (
   '$' => '$', # LyX math mode doesn't
   '$$' => '\[', # understand \$ or $$
   );

    # Translation table for TT::End::Group tokens
    my %endtranstbl = (
      '$' => '$',
      '$$' => '\]',
         );

    # Translation table for TT::Token tokens whose translations should
    #    NOT have whitespace after them! See sub translate...
    #   Note that tokens of type TT::EndLocal are always translated to '}'. So,
    #   any token defined as a local token *must* be translated to something
    #   with a '{' (e.g., '\em' -> '\emph{') or we'll have mismatched braces
    my %no_ws_transtbl = (
   '\em' => '\emph{',
   '\rm' => '\textrm{',
   '\bf' => '\textbf{',
   '\tt' => '\texttt{',
   '\sf' => '\textsf{',
   '\sc' => '\textsc{',
   '\sl' => '\textsl{',
   '\it' => '\textit{',
   '\rmfamily' => '\textrm{',
   '\ttfamily' => '\texttt{',
   '\sffamily' => '\textsf{',
   '\mdseries' => '\textmd{',
   '\bfseries' => '\textbf{',
   '\upshape' => '\textup{',
   '\itshape' => '\textit{',
   '\slshape' => '\textsl{',
   '\scshape' => '\textsc{',
   '\cal' => '\mathcal{',
   );

    # a faux "switch" statement.  sets $_ for later use in pattern
    # matching.
    $type = ref($eaten);
    $type =~ s/^Text::TeX::// or die "Non-Text::TeX object";
    my $printstr = ""; # default for undefined printstrs etc.
    SWITCH: for ($type) {
    # Handle blank lines.
    if (/Paragraph/) {
        last SWITCH;
    }

    # Handle the end of a local font command - insert a '}'
    if (/EndLocal/) {
        # we could just say $printstr='}'
        $printstr = &translate('}', \%endtranstbl);
        last SWITCH;
    }

    # $eaten->exact_print is undefined for previous environments
    $outstr = $eaten->exact_print;
    if (! defined $outstr) { # comment at end of paragraph
        warn "Weird undefined token $eaten!" unless $eaten->comment;
        last SWITCH;
     }

    # Handle LaTeX tokens
    if (/^Token$/) {
        my $realtok = $eaten->print; # w/out whitespace
        # If a comment is its own paragraph, print nothing
        last SWITCH unless defined($realtok);
        # Special handling for \verb and \verb*
        if ($realtok =~ /^\\verb\*?/) {
            $printstr = &Verbatim::copy_verb($txt,$eaten);
     last SWITCH;
        }

        # Translate token if necessary, or just print it
        # "no_ws" is HACK to remove whitespace, so '\em ' -> '\emph{'
        $printstr = &translate($outstr, \%no_ws_transtbl, "no_ws");

        # Ignore optional argument(s) if necessary
        $printstr .= &handle_opt_args($eaten,$txt);

        last SWITCH;
    }

    # Tokens taking arguments, like '^'
    # ADD '{' if there isn't one before the argument!
    if (/^BegArgsToken$/) {
        $printstr = $outstr;

        # Ignore optional argument(s) if necessary
        $printstr .= &handle_opt_args($eaten,$txt);

        # Add beginning brace before the 1st argument if there isn't one
        my $tok = $txt->lookAheadToken;
        $printstr .= '{' unless ($tok =~ /\{/);
        last SWITCH;
    }

    # End of one argument, beginning of next
    # Note: by default ArgToken,EndArgsToken print nothing
    # ADD '}' if there isn't one after the last argument
    # Then read and print any optional arguments which may exist
    #    between this argument the next (we must do this here or we would
    #    add a '{' before an optional argument!)
    # ADD '{' if there isn't one before the next argument!
    # (just like we do in BegArgsToken and EndArgsToken)
    if (/^ArgToken$/) {
        $printstr = $outstr; # = ''

        # Add '}' after the argument that ended if necessary
        $printstr .= '}' unless $last_eaten->print eq "\}";

        # Eat and print any optional arguments
        $printstr .= &handle_opt_args($eaten,$txt);

        # Add '{' before the next argument if necessary
        my $tok = $txt->lookAheadToken;
        $printstr .= '{' unless ($tok =~ /\{/);
        last SWITCH;
    }

    # End of tokens taking arguments, like '^'
    #     ADD '}' if there isn't one after the last argument, i.e.,
    # if the previous token *wasn't* a '}'
    #     Kludge: for TeX style \input command ("\input foo" with no
    # braces) we need to read the whole filename, but parser will have
    # read only one char. So read in the rest of the filename before
    # printing the '}'.
    if (/^EndArgsToken$/) {
        $printstr = $outstr; # = ''

        unless ($last_eaten->print eq "\}") {
     my $s = $eaten->base_token;
     if ($s->print eq "\\input") {
         my $t = $txt->lookAheadToken;
         # For one-char filename (a.tex) do nothing
         if ($t =~ /^[\w.\-]/) {
      my $u = $txt->eatMultiToken;
      $t = $u->print;
      $t =~ s/\s+//g;
      $printstr .= $t;
   }
   # TeX \input always adds .tex ending
   $printstr .= ".tex";
      }

     $printstr .= '}';
  }

        # Don't bother eating optional args coming after the last
        # required arg: they'll just be copied as text
        last SWITCH;
    }

    # Handle opening groups, like '{' and '$'.
    if (/Begin::Group$/) {
        $printstr = &translate($outstr,\%begtranstbl);
        last SWITCH;
    }

    # Handle closing groups, like '}' and '$'.
    if (/End::Group$/) {
        $printstr = &translate($outstr, \%endtranstbl);
        last SWITCH;
    }

    if (/Begin::Group::Args/) {
        my $env = $eaten->environment;
        $printstr = $outstr;
        if ($env eq "verbatim" || $env eq "reLyXskip") {
     # copy everything up to "\end{foo}"
            $printstr .= &Verbatim::copy_verbatim($txt, $eaten);
        }
        last SWITCH;
    }

    if  (/End::Group::Args/) {
        $printstr = $outstr;
        last SWITCH;
    }

    if (/Text/) {
        $printstr = $outstr;
        last SWITCH;
    }

    # The default action - print the string.
    $printstr = $outstr;
    } # end SWITCH:for ($type)

    # Actually print the string
    if (defined $printstr) {
print OUTFILE $printstr;
$last_eaten = $eaten; #save for next time
    } else {warn "Undefined printstr";}

} # end sub clean_tex

####################   TRANSLATOR SUBROUTINES    ###############################
sub translate {
# Replace a string (possibly with whitespace around it) with another
# Arg0 is a string, Arg1 is a reference to a hash containing translations
# If a token not in the table is passed in, do nothing
# If Arg2 is defined AND the token is known, then remove whitespace from
#     the end of the translated token. This is a HACK to do '\em ' -> '\emph{'
# Return the string, possibly modified
    my ($tokstr, $transref) = (shift, shift);
    my $remove_ws = shift;
    my %transtable = %$transref;

    # remove whitespace from the string (since transtable doesn't have it)
    my $stripstr = $tokstr;
    $stripstr =~ s/^\s*(\S+)\s*$/$1/ or warn "couldn't strip token";
    if ( exists $transtable{$stripstr} ) {
         # use \Q or \, (, $, and [ will be misinterpreted
        $tokstr =~ s/\Q$stripstr\E/$transtable{$stripstr}/;

# remove whitespace?
if (defined $remove_ws) {
     $tokstr =~ s/\s*$//;
}
    }

    return $tokstr;
}

sub handle_opt_args {
# read and concatenate OR IGNORE optional arguments
# Arg0 is a BegArgsToken or ArgToken
    my ($eaten,$fileobject) = (shift,shift);
    my $outstr = "";

    # If at end of paragraph, don't bother looking for optArgs
    return "" unless $fileobject->lookAheadToken;

    # Get the next argument(s) expected for this token == /^o*[rR]?$/
    # If there are no args expected, just return
    my $curr_args = $eaten->next_args($fileobject) or return "";

    # Now print or ignore any optional arguments
    # If there's an 'r' in curr_args, we're done for now
    my $foo;
    my $token_name = $eaten->token_name; # (needed for EndArgsToken, e.g.)
    while ($curr_args =~ s/^o//) {
        my $opt = $fileobject->eatOptionalArgument;
# Print any initial space before the optional argument
        if ($foo = $opt->exact_print) {
     if ($foo =~ /^(\s+)/) {
         $outstr .= $1;
     }
}

# Print the argument or ignore it
        if ($opt->print) {
     if (grep /^\Q$token_name\E$/, @DeleteOptArg) {
         print "Optional argument '",$opt->print,
         "' to macro $token_name ignored\n";
     } else {
         $outstr .= "[" . $opt->print . "]";
     }
        } # Was an optional argument found?
    }

    return $outstr;
} # end sub handle_opt_args

1; # return true value to calling program