%{ /* * html2setext.y - HTML parser and setext converter * Created 15/12/94 by avatar@sericyb.com.au (Andrew Pam) * Copyright (c) 1994-1997 Serious Cybernetics * * Date Who Description * -------- --- ----------- * 16/12/94 ADP Added and * 17/01/95 ADP Added word wrap * 18/01/95 ADP Added setext headings * 18/02/95 ADP Fixed line spacing and indentation * 24/02/95 ADP Added nested unordered lists * 02/03/95 ADP Added nested ordered lists * 28/04/96 ADP Added including ALT parameter * 01/05/96 ADP Added SGML quote entity and
support * 05/05/96 ADP Added , and * 25/07/97 ADP Added
support * * TODO: (hyperlink) and
support */ #include #include #include #ifdef __GNUC__ void yyerror(const char *msg); int yylex(void); #endif #define FALSE (0) #define TRUE (!FALSE) #define MARGIN (68) #define MAXLINE (128) #define S_HRULE " ____________________________________________________________\n" #define S_BOLD "**" #define S_EOF "$$" #define S_INDENT " " #define S_LIST "* " #define S_QUOTE "> " #define CH_EMPH '_' #define CH_ITAL '~' #define CH_H1 '=' #define CH_H2 '-' typedef int BOOL; typedef struct listn { struct listn *next; /* Pointer to next list node */ int num; /* Ordered list numbering, 0 for unordered */ } LISTN; BOOL body = FALSE; /* TRUE inside the document body */ BOOL center = FALSE; /* TRUE inside
tag */ BOOL flush = FALSE; /* TRUE if flush left */ BOOL pre = FALSE; /* TRUE inside
 tag */
BOOL	quote = FALSE;		/* TRUE inside 
tag */ BOOL sentence = FALSE; /* TRUE at end of sentence */ int blanks = 0; /* Number of blank lines skipped */ int depth = 0; /* Indentation depth */ char heading = (char) 0; /* Current heading level */ char line[MAXLINE] = ""; /* Output line buffer */ LISTN *listp = NULL; /* Head of list numbering list */ int yydebug = 1; /* */ void output(void) /* Trim trailing spaces and output the current line */ { register int i = strlen(line); /* Find the last non-space character */ while (i-- && (line[i] == ' ')) ; /* Truncate the line */ line[++i] = (char) 0; if (center && i && (heading != '1') && (heading != '2')) for (i = (MARGIN - i) / 2; i > 0; --i) putchar(' '); puts(line); if (*line) blanks = 0; else blanks++; *line = (char) 0; sentence = FALSE; if (heading == '1') { while (i--) putchar(CH_H1); putchar('\n'); } if (heading == '2') { while (i--) putchar(CH_H2); putchar('\n'); } } /* Implemented as macros for speed */ #define coutput() { if (*line) output(); } #define skip(count) { coutput(); while (blanks < (count)) output(); } void indent(void) { register int i = depth; while (--i) strcat(line, S_INDENT); } void wraps(char *s) { char *p; /* Punctuation inside a word is not the end of a sentence */ if (*s != '"') sentence = FALSE; if (quote && !*line) /* Add quote marker at the beginning of a line */ strcpy(line, S_QUOTE); else if (!flush && !*line) { /* Indent at the beginning of a normal line */ strcat(line, S_INDENT); if (depth) indent(); } /* Append the text */ strcat(line, s); /* Check for line break */ if (!pre && (strlen(line) > MARGIN)) /* Look for a space to break at */ if ((p = strrchr(line, ' ')) == NULL) output(); else { *p = (char) 0; output(); if (quote) strcpy(line, S_QUOTE); else if (!flush) { strcat(line, S_INDENT); if (depth) indent(); } strcat(line, ++p); } } void wrap(char c) { char s[2]; s[0] = c; s[1] = 0; wraps(s); } void space() { /* Spaces are literal inside
 */
	if (pre)
		strcat(line, " ");
	/* Two spaces at the end of a sentence */
	else if (sentence)
		if ((strlen(line) + 1) >= MARGIN)
			output();
		else
		{
			strcat(line, "  ");
			sentence = FALSE;
		}
	/* Otherwise check for existing characters */
	else if (*line && (line[strlen(line)-1] != ' '))
		if (strlen(line) >= MARGIN)
			output();
		else
			strcat(line, " ");
}

void newlist(int num)
{
	LISTN *newp = malloc(sizeof(LISTN));

	newp->next = listp;
	newp->num = num;
	listp = newp;
}

void freelist(void)
{
	LISTN *oldp = listp;

	if (oldp)
	{
		listp = oldp->next;
		free(oldp);
	}
}

%}

%union
{
	char c;
	char *s;
}

%token		NEWLINE SPACE SGML_AMP SGML_GT SGML_LT SGML_QUOT
%token		ADDRESS END_ADDRESS END_ANCHOR BASE BODY END_BODY BOLD END_BOLD
%token		BQUOTE END_BQUOTE BREAK CENTER END_CENTER CITE END_CITE
%token		DDEF DLIST END_DLIST DTERM EMPH END_EMPH FONT END_FONT
%token		HEAD END_HEAD HRULE HTML END_HTML ITAL END_ITAL LINK LIST
%token		PARA END_PARA PRE END_PRE OLIST END_OLIST TITLE END_TITLE
%token		ULIST END_ULIST
%token		SENTENCE TEXT SGML_UML HEADING END_HEADING
%token		SGML_NUMERIC SGML_ENTITY ANCHOR HTML_TAG IMAGE

%%
file:	{ body = TRUE; } data
  |	title { body = TRUE; } data
  |	head data body data
  |	HTML data head data body data END_HTML data
  ;

head:	HEAD heads END_HEAD
  ;

heads:	/* empty */
  |	heads text
  |	heads BASE
  |	heads title
  |	heads LINK
  ;

title:	TITLE data END_TITLE
  ;

body:	BODY { body = TRUE; flush = FALSE; } data
	END_BODY { body = FALSE; flush = TRUE; }
  ;

data:	/* empty */
  |	data text
  ;

text:	NEWLINE		{ if (body) if (pre) output(); else space(); }
  |	SPACE		{ if (body) space(); }
  |	SENTENCE	{ if (body) wrap($1); sentence = TRUE; }
  |	TEXT		{ if (body) wrap($1); }
  |	sgml
  |	html
  ;

sgml:	SGML_AMP	{ wrap('&'); }
  |	SGML_GT		{ wrap('>'); }
  |	SGML_LT		{ wrap('<'); }
  |	SGML_QUOT	{ wrap('"'); }
  |	SGML_NUMERIC	{ wrap(atoi($1)); }
  |	SGML_UML	{ wrap($1); wrap('e'); }
  |	SGML_ENTITY	{ wrap('&'); wraps($1);
			  yyerror("Unknown SGML entity"); }
  ;

html:	anchor
  |	pre
  |	ADDRESS		{ coutput(); } data END_ADDRESS
  |	BQUOTE		{ coutput(); quote = TRUE; } data
	END_BQUOTE	{ coutput(); quote = FALSE; }
  |	CENTER		{ coutput(); center = TRUE; } data
	END_CENTER	{ coutput(); center = FALSE; }
  |	CITE data END_CITE
  |	BREAK		{ output(); }
  |	PARA		{ skip(1); }
  |	END_PARA
  |	HRULE		{ coutput(); puts(S_HRULE); }
  |	HEADING		{ flush = TRUE; skip(2); heading = $1; }
  |	END_HEADING	{ coutput();
			  if ($1 != heading) yyerror("Heading level mismatch");
			  heading = (char) 0; output(); flush = FALSE; }
  |	DLIST		{ coutput(); } dlist
	END_DLIST	{ skip(1); }
  |	OLIST		{ coutput(); newlist(1); depth++; } list
	END_OLIST	{ depth--; freelist(); skip(1); }
  |	ULIST		{ coutput(); newlist(0); depth++; } list
	END_ULIST	{ depth--; freelist(); skip(1); }
  |	BOLD		{ wraps(S_BOLD); } data END_BOLD { wraps(S_BOLD); }
  |	EMPH		{ wrap(CH_EMPH); } data END_EMPH { wrap(CH_EMPH); }
  |	ITAL		{ wrap(CH_ITAL); } data END_ITAL { wrap(CH_ITAL); }
  |	FONT data END_FONT
  |	IMAGE		{ space();
			  if ($1) { wraps($1); free($1); }
			  else wraps("[IMAGE]");
			  space();
			}
  |	HTML_TAG	{ char *errmsg = malloc(strlen($1) + 20);
			  strcpy(errmsg, "Unknown HTML tag <");
			  strcat(errmsg, $1); strcat(errmsg, ">");
			  free($1); yyerror(errmsg); free(errmsg); }
  ;

anchor: ANCHOR data END_ANCHOR	{ /* yyerror($1); */ }
  ;

dbody:	text
  |	DTERM		{ coutput(); flush = TRUE; }
  |	DDEF		{ flush = FALSE; coutput(); }
  ;

dlist:	/* empty */
  |	dlist dbody
  ;

lbody:	text
  |	LIST		{ coutput();
			  if (listp->num) sprintf(line, "%d ", listp->num++);
			  else strcpy(line, S_LIST);
			  if (depth) indent(); }
  ;

list:	/* empty */
  |	list lbody
  ;

pre:	PRE		{ pre = flush = TRUE; } data
	END_PRE		{ pre = flush = FALSE; }
  ;

%%

int main(void)
{
	if (yyparse())
		fprintf(stderr, "Parse failed.\n");
	else
	{
		coutput();
		puts(S_EOF);
	}
	return 0;
}