view ueda/sverp/lexer.c @ 82:20c6f84c75e7

bomstruct.h factored out
author Mychaela Falconia <falcon@freecalypso.org>
date Thu, 23 Feb 2017 19:18:16 +0000
parents 7b4f78fcca08
children
line wrap: on
line source

/*
 * This module contains the lexer underlying the parser for the minimal
 * subset of Verilog we grok; the same lexer will be used to read and parse
 * the primitives definition file.
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include "lexer.h"

char *parser_filename;
FILE *parser_readF;
int parser_lineno;
char parser_read_word[MAXWORD+1];
int parser_read_number;
int pushback_token;

void
parse_error(msg)
	char *msg;
{
	fprintf(stderr, "%s line %d: %s\n", parser_filename, parser_lineno,
		msg);
	exit(1);
}

static
my_getchar()
{
	register int c;

	c = getc(parser_readF);
	if (c < 0)
		return(c);
	if (!isascii(c))
		parse_error("non-ASCII character");
	if (iscntrl(c) && c != '\n' && c != '\t')
		parse_error("invalid control character");
	return(c);
}

static void
handle_trad_comment()
{
	register int c, flag;

	for (flag = 0; ; ) {
		c = my_getchar();
		if (c < 0)
			parse_error("/* comment ends in EOF");
		if (c == '\n')
			parser_lineno++;
		if (c == '/' && flag)
			return;
		flag = (c == '*');
	}
}

static void
handle_line_comment()
{
	register int c;

	for (;;) {
		c = my_getchar();
		if (c < 0)
			parse_error("// comment ends in EOF");
		if (c == '\n') {
			parser_lineno++;
			return;
		}
	}
}

static void
handle_comment()
{
	int c;

	c = my_getchar();
	switch (c) {
	case '*':
		/* traditional C comment style */
		handle_trad_comment();
		return;
	case '/':
		/* new-fangled double slash comment style */
		handle_line_comment();
		return;
	default:
		parse_error("character after '/' is not '*' or '/'");
		exit(1);
	}
}

static void
handle_num_token(first_digit)
{
	register int c, n;

	parser_read_number = first_digit - '0';
	for (n = 1; ; n++) {
		c = my_getchar();
		if (!isdigit(c))
			break;
		parser_read_number *= 10;
		parser_read_number += c - '0';
	}
	if (c >= 0) {
		if (isalpha(c) || c == '_')
			parse_error(
			"digits followed by letters: neither word nor number");
		ungetc(c, parser_readF);
	}
	if (n > MAXDIGITS)
		parse_error("number is too long (MAXDIGITS exceeded)");
}

static void
handle_word_token(first_char)
{
	register int c;
	register char *cp;
	register int len;

	cp = parser_read_word;
	*cp++ = first_char;
	for (len = 1; ; ) {
		c = my_getchar();
		if (!isalnum(c) && c != '_' && c != '$')
			break;
		if (len >= MAXWORD)
			parse_error("text token is too long");
		*cp++ = c;
		len++;
	}
	*cp = '\0';
	if (c < 0)
		return;
	ungetc(c, parser_readF);
}

static void
handle_qstr()
{
	register int c;
	register char *cp;
	register int len;

	cp = parser_read_word;
	for (len = 0; ; ) {
		c = my_getchar();
		if (c == EOF || c == '\n')
unterm:			parse_error("unterminated quoted string");
		if (c == '"')
			break;
		if (c == '\\') {
			c = my_getchar();
			if (c == EOF || c == '\n')
				goto unterm;
		}
		if (len >= MAXWORD)
			parse_error("quoted string is too long");
		*cp++ = c;
		len++;
	}
	*cp = '\0';
}

get_token()
{
	register int c;

	if (c = pushback_token) {
		pushback_token = 0;
		return(c);
	}
loop:	c = my_getchar();
	switch (c) {
	case EOF:
		return(0);
	case ' ':
	case '\t':
		goto loop;
	case '\n':
		parser_lineno++;
		goto loop;
	case '/':
		handle_comment();
		goto loop;
	case '(':
	case ')':
	case ',':
	case '.':
	case ':':
	case ';':
	case '[':
	case ']':
		return(c);
	case '"':
		handle_qstr();
		return(QSTRING);
	}
	if (isdigit(c)) {
		handle_num_token(c);
		return(NUMBER);
	}
	if (isalpha(c) || c == '_') {
		handle_word_token(c);
		return(WORD);
	}
	fprintf(stderr, "%s line %d: bad character \'%c\'\n", parser_filename,
		parser_lineno, c);
	exit(1);
}