/* SchemeWEB -- WEB for Lisp.  John D. Ramsdell.
 * Simple support for literate programming in Lisp.
 */

/* 	$Id: sweb.c,v 2.1 94/07/21 11:30:36 ramsdell Exp $	 */

#ifndef lint
static char vcid[] = "$Id: sweb.c,v 2.1 94/07/21 11:30:36 ramsdell Exp $";
static char copyright[] = "Copyright 1994 by The MITRE Corporation.";
#endif /* lint */

#define VERSION "2.1"

/*
 * Copyright 1994 by The MITRE Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 1, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * For a copy of the GNU General Public License, write to the 
 * Free Software Foundation, Inc., 675 Mass Ave, 
 * Cambridge, MA 02139, USA.
 */

/*
This program processes SchemeWEB files.  A SchemeWEB file is a Lisp
source file which contains code sections and comment sections, but
each section is identified in a novel way.  A code section begins with
a line whose first character is a left parenthesis.  It continues
until a line is found which contains the parenthesis that matches the
one which started the code section.  The remaining lines of text in
the source file are treated as comments.  Several operations involving
SchemeWEB files are provided by the this program.  See the manual
page for a complete description of the various operations.
*/

/* SchemeWEB is currently set up for use with LaTeX. */

/* Define TANGLE to make a program which translates SchemeWEB source
into Scheme source by default. */

/* Define SAVE_LEADING_SEMICOLON if you want text lines to be copied 
with any leading semicolon while weaving. */

#include <stdio.h>

typedef enum {FALSE, TRUE} bool;

/* Runtime flags */
bool weaving;			/* Weaving or tangling? */
bool strip_comments;		/* Strip comments while tangling. */

/* Formatting commands added into weaved documents. */
char *begin_comment = "\\mbox{"; /* This pair is used */
char *end_comment = "}";	/* to surround comments in code. */
char *begin_code = "\\begin{flushleft}\n"; /* This pair is used */
char *end_code = "\\end{flushleft}\n"; /* to surround code. */
char *code_line_separator = "\\\\ ";
char *begin_code_line = "\\verb|"; /* This pair is used */
char *end_code_line = "|";	/* to surround code lines. */

/* Information for error messages. */
char *prog = NULL;		/* Name of program. */
char *src = NULL;		/* Name of input file. */
int lineno = 1;			/* Line number. */

/* Output occurs through putchar, putstring, and code_putchar. */

#define putstring(s) (fputs(s, stdout))

int				/* Used while printing */
code_putchar(c)			/* a code section. */
     int c;
{
  if (c == '|' && weaving) return putstring("|\\verb-|-\\verb|");
  else return putchar(c);
}

/* All input occurs in the following routines so that TAB characters
can be expanded while weaving. TeX treats TAB characters as a
space--not what is wanted. */

int ch_buf;			/* Used to implement */
bool buf_used = FALSE;		/* one character push back. */

int 
getchr()
{
  int c;
  static int spaces = 0;	/* Spaces left to print a TAB. */
  static int column = 0;	/* Current input column. */
  if (buf_used) {
    buf_used = FALSE;
    return ch_buf;
  }
  if (spaces > 0) {
    spaces--;
    return ' ';
  }
  switch (c = getc(stdin)) {
  case '\t':
    if (!weaving) return c;
    spaces = 7 - (7&column);	/* Maybe this should be 7&(~column). */
    column += spaces + 1;
    return ' ';
  case '\n':
    lineno++;
    column = 0;
    return c;
  default:
    column++;
    return c;
  }
}

void 
ungetchr(c)
     int c;
{
  buf_used = TRUE;
  ch_buf = c;
}

/* Error message for end of file found in code. */
bool 
report_eof_in_code()
{
  fprintf(stderr, "End of file within a code section.\n");
  return TRUE;
}

bool 
copy_text_saw_eof()		/* Copies a line of text out. */
{				/* Used while printing */
  int c;			/* a text section. */
  while (1) {
    c = getchr();
    if (c == EOF) return TRUE;
    if (c == '\n') return FALSE;
    putchar(c);
  }
}

bool 
strip_text_saw_eof()		/* Gobbles up a line of input. */
{
  int c;
  while (1) {
    c = getchr();
    if (c == EOF) return TRUE;
    if (c == '\n') return FALSE;
  }
}

bool				/* This copies comments */
copy_comment_saw_eof()		/* within code sections. */
{				
  if (weaving) putstring(begin_comment);
  putchar(';');
  if (copy_text_saw_eof()) return TRUE;
  if (weaving) putstring(end_comment);
  return FALSE;
}

bool				/* Copies a string found */
copy_string_saw_eof()		/* within a code section. */
{
  int c;
  while (1) {
    c = getchr();
    if (c == EOF) return TRUE;
    if (c == '\n') {		/* Found a string which continues on */
      putstring(end_code_line);	/* a new line. */
      putchar(c);		/* Close existing line, and then */
      putstring(code_line_separator); /* begin copying the rest of */
      putstring(begin_code_line); /* on the next line. */
      continue;
    }
    code_putchar(c);
    switch (c) {
    case '"': return FALSE;
    case '\\':
      c = getchr();
      if (c == EOF) return TRUE;
      code_putchar(c);
    }
  }
}

bool 
maybe_char_syntax_saw_eof()
{				/* Makes sure that the character */
  int c;			/* #\( does not get counted in */
  c = getchr();			/* balancing parentheses. */
  if (c == EOF) return TRUE;
  if (c != '\\') {
    ungetchr(c);
    return FALSE;
  }
  code_putchar(c);
  c = getchr();
  if (c == EOF) return TRUE;
  code_putchar(c);
  return FALSE;
}

bool				/* Copies a code section */
copy_code_failed()		/* containing S-exprs. */
{
  int parens = 1;		/* Used to balance parentheses. */
  int c;
  while (1) {			/* While parens are not balanced, */
    c = getchr();
    if (c == EOF)		/* Report failure on EOF. */
      return report_eof_in_code();
    if (c == '\n' && weaving)
      putstring(end_code_line);
    if (c == ';') { 		/* Report failure on EOF in a comment. */
      if (weaving) putstring(end_code_line);
      if (strip_comments
	  ? strip_text_saw_eof()
	  : copy_comment_saw_eof())
	return report_eof_in_code();
      else
	c = '\n';
    }
    code_putchar(c);		/* Write the character and then see */
    switch (c) {		/* if it requires special handling. */
    case '(':
      parens++;
      break;
    case ')':
      parens--;			
      if (parens < 0) {
	fprintf(stderr, "Too many right parentheses found.\n");
	return TRUE;
      }
      break;
    case '"':			/* Report failure on EOF in a string. */
      if (copy_string_saw_eof()) {
	fprintf(stderr, "End of file found within a string.\n");
	return TRUE;
      }
      break;
    case '#':			/* Report failure on EOF in a character. */
      if (maybe_char_syntax_saw_eof())
	return report_eof_in_code();
      break;
    case '\n':
      if (parens == 0) return FALSE;
      if (weaving) {
	putstring(code_line_separator);
	putstring(begin_code_line);
      }
    }
  }
}

int 
schemeweb()
{
  int c;
  while (1) {			/* At loop start it's in text mode */
    c = getchr();		/* and at the begining of a line. */
    if (c == '(') {		/* text mode changed to code mode. */
      if (weaving) putstring(begin_code);
      do {			/* Copy code. */
	if (weaving) putstring(begin_code_line);
	putchar(c);
	if (copy_code_failed()) {
	  fputs(prog, stderr);
	  if (src != NULL)
	    fprintf(stderr, ":%s:", src);
	  else
	    fputs(":<stdin>:", stderr);
	  fprintf(stderr,
		  "%d: Error in a code section.\n",
		  lineno);
	  return 1;
	}
	c = getchr();		/* Repeat when there is code */
      } while (c == '(');	/* immediately after some code. */
      if (weaving) putstring(end_code);
    }
    /* Found a text line--now in text mode. */
#if !defined SAVE_LEADING_SEMICOLON
    if (c == ';' && weaving)
      c = getchr();
#endif
    if (c == EOF) return 0;	/* Files that do not end with */
    ungetchr(c);		/* a newline are okay. */

    if (strip_comments) {
      if (strip_text_saw_eof()) return 0;
    }
    else {
      if (c != '\n' && !weaving) putchar(';');
      if (copy_text_saw_eof()) return 0; /* Copy a text line. */
      putchar('\n');
    }
  }
}

int				/* Removes any semicolons */
untangle()			/* than start a line of text. */
{
  int c;
  
  while (1) {			/* At a beginning of a line of text */
    c = getchar();		/* when at this point in the code. */
    if (c == EOF) return 0;
    if (c != ';') putchar(c);
    while (c != '\n') {
      c = getchar();
      if (c == EOF) return 0;
      putchar(c);
    }
  }
}

bool				/* Open the file arguments */
open_file_args_failed(argc, argv)
     int argc;
     char *argv[];
{
  switch (argc) {
  case 2:
  case 1:
    src = argv[0];		/* Save for error messages. */
    if (NULL == freopen(argv[0], "r", stdin)) {
      fprintf(stderr, "Cannot open %s for reading.\n", argv[0]);
      break;
    }
    if (argc == 2 && NULL == freopen(argv[1], "w", stdout)) {
      fprintf(stderr, "Cannot open %s for writing.\n", argv[1]);
      break;
    }
  case 0:
    return FALSE;
  }
  return TRUE;
}

int 
usage()
{
  fprintf(stderr, 
	  "Usage: %s [-stuvwx] [input_file [output_file]]\n%s%s%s%s%s%s",
	  prog,
	  "\t-s:  tangle input stripping comments\n",
	  "\t-t:  tangle input retaining comments\n",
	  "\t-u:  untangle input\n",
	  "\t-v:  print version information\n",
	  "\t-w:  weave input\n",
	  "\t-x:  weave input and exclude line breaks in code sections\n");
  fprintf(stderr, "The default option is %s.\n",
#if defined TANGLE
	  "-t"
#else
	  "-w"
#endif
	  );
  return 1;
}

int 
main (argc, argv)
     int argc;
     char *argv[];
{
  bool untangling = FALSE;
#if defined TANGLE
  weaving = FALSE;
#else
  weaving = TRUE;
#endif
  strip_comments = FALSE;

  prog = argv[0];		/* Save program name for error messages. */

  /* Option processing.  Note only one option can be requested at a time. */
  /* -s: tangle input stripping comments. */
  /* -t: tangle input retaining comments. */
  /* -u: untangle input. */
  /* -v: print version information. */
  /* -w: weave input. */
  /* -x: weave input and exclude line breaks in code sections. */
  if (argc > 1 && argv[1][0] == '-') {
    switch (argv[1][1]) {
    case 's': weaving = FALSE; strip_comments = TRUE; break;
    case 't': weaving = FALSE; break;
    case 'u': untangling = TRUE; break;
    case 'v':
      fprintf(stderr, "This is SchemeWEB version %s.\n", VERSION);
      return 0;
    case 'w': weaving = TRUE; break;
    case 'x': weaving = TRUE; code_line_separator = "\\\\* "; break;
    default:
      fprintf(stderr, "Bad option: -%c.\n", argv[1][1]);
      return usage();
    }
    if (argv[1][2] != '\0') {
      fprintf(stderr, "Only one option allowed.\n");
      return usage();
    }
    argc--; argv++;
  }

  if (open_file_args_failed(argc - 1, argv + 1)) return usage();

  if (untangling) return untangle();
  return schemeweb();
}