/*
 * look.c
 *
 * Compute LL(1) lookahead sets for SORCERER code-generator
 *
 * SOFTWARE RIGHTS
 *
 * We reserve no LEGAL rights to SORCERER -- SORCERER is in the public
 * domain.  An individual or company may do whatever they wish with
 * source code distributed with SORCERER or the code generated by
 * SORCERER, including the incorporation of SORCERER, or its output, into
 * commerical software.
 * 
 * We encourage users to develop software with SORCERER.  However, we do
 * ask that credit is given to us for developing SORCERER.  By "credit",
 * we mean that if you incorporate our source code into one of your
 * programs (commercial product, research project, or otherwise) that you
 * acknowledge this fact somewhere in the documentation, research report,
 * etc...  If you like SORCERER and have developed a nice tool with the
 * output, please mention that you developed it using SORCERER.  In
 * addition, we ask that this header remain intact in our source code.
 * As long as these guidelines are kept, we expect to continue enhancing
 * this system and expect to make other tools available as they are
 * completed.
 *
 * SORCERER 1.00B
 * Terence Parr
 * AHPCRC, University of Minnesota
 * 1992-1994
 */
#include "stdpccts.h"
#include "sym.h"
#include "proto.h"

static GLA *end_node;

static GLA *newGLANode();

set
#ifdef __STDC__
Lookahead( GLA *p )
#else
Lookahead( p )
GLA *p;
#endif
{
	int i;
	set r, rv;

	if ( p == NULL ) return empty;
	r = rv = empty;

	/* have we been here before for this k? Detect cycles */
	if ( p->visited )
	{
		return empty;
	}
	p->visited = 1;
	
	/* Does edge 1 have a token label? */
	if ( p->label1 <= last_valid_token )
	{
		rv = set_of(p->label1);
		if ( p->upper_range!=0 )
		{
			int i;
			/* put all elements of range into lookahead set */
			for (i=p->label1+1; i<=p->upper_range; i++)
			{
				set_orel(i, &rv);
			}
		}
	}
	else if ( p->label1 == wild_card || p->label1 == end_of_input )
	{
		rv = set_of(p->label1);
	}
	else								/* must be an epsilon transfer */
	{
		int trouble = 0;
		if ( p->p1!=NULL )
		{
			/* check for infinite-recursion */
			/* o->o->o->o-A->o..		Rule def (p->p1 points to far left node)
			 *       |					for 'a : A | B;'
			 *       o->o-B->o..
			 *
			 * the visited flag is set on the start of the alts.
			 */
			if ( p->is_rule_ref && p->p1->is_rule_entry )
			{
				require(p->p1->p1!=NULL, "Lookahead: invalid GLA");
				require(p->p1->p1->p1!=NULL, "Lookahead: invalid GLA");
				if ( p->p1->p1->p1->visited ) {
					errNoFL(eMsg2("infinite recursion from rule %s to rule %s",
								  p->in_rule,p->p1->in_rule));
					rv = set_of(wild_card);
					trouble = 1;
				}
			}
			if ( !trouble )
			{
				r = Lookahead(p->p1);
				rv = set_or(r, rv);
				set_free(r);
			}
		}
	}

	/* handle edge 2 */
	r = Lookahead(p->p2);					/* tokens can't be on e2 edges */
	set_orin(&rv, r);
	set_free(r);

	/* this node is no longer visited */
	p->visited = 0;

	return rv;
}

/*
 * Build a big, interwined, lookahead GLA from the lookahead GLA created
 * for each rule.  From this, lookahead computation is trivial.
 *
 * [1]	Build the GLA start states for each nonterminal.  Each nonterminal
 *		looks like "o-->o" where the 2nd 'o' is the start of the GLA list
 *		of alts for the grammar block.  Alternatives are connected like this:
 *
 *		o-->o-->alt1 -->o
 *		    |           ^
 *		    o-->alt2 ---|
 *		    ...         |
 *		    |           |
 *		    o-->altn ---|
 *
 *		The BLOCK building function actually makes the ALT list.  This
 *		function only builds the first 'o' node (upper left).
 *
 * [2]	Make GLA for block of each rule.
 *
 * [3]	Make a set of links to the nodes following all references to
 *		this rule.
 */
void
#ifdef __STDC__
build_GLA( AST *rules )
#else
build_GLA( rules )
AST *rules;
#endif
{
	SymEntry *s;
	AST *r;
	GLA *end_of_rule, *blk_start;
	require(rules!=NULL, "build_GLA: NULL rule list");

	for (r=rules; r!=NULL; r=r->right)
	{
		s = (SymEntry *) hash_get(symbols, r->text);
		require(s!=NULL, "build_GLA: sym tab broken");
		CurRule = s->str;
		s->start_state = newGLANode();
		s->start_state->is_rule_entry = 1;
	}

	end_node = newGLANode();
	for (r=rules; r!=NULL; r=r->right)
	{
		s = (SymEntry *) hash_get(symbols, r->text);
		require(s!=NULL, "build_GLA: sym tab broken");
		CurRule = s->str;
		blk_start = newGLANode();
		blk_start->p1 = build_GLA_for_block(r->down, &end_of_rule);
		blk_start->label1 = epsilon;
		s->start_state->p1 = blk_start;
		s->start_state->label1 = epsilon;
		s->end_rule = end_of_rule;
		/* make this BLOCK correspond to the GLA for this rule */
		r->down->start_state = s->start_state->p1;
	}

	for (r=rules; r!=NULL; r=r->right)
	{
		s = (SymEntry *) hash_get(symbols, r->text);
		require(s!=NULL, "build_GLA: sym tab broken");
		build_follow_links(s->end_rule, s->refs);
	}

}

/*
 * Tree looks like ( BLOCK ( ALT alpha ) ... ( AST beta ) ).
 *
 * For each ALT of BLOCK, build a new alternative into the GLA.
 *
 * Build a block that looks like:
 *
 *		o-->alt1 -->o
 *		|           ^
 *		o-->alt2 ---|
 *		...         |
 *		|           |
 *		o-->altn ---|
 */
GLA *
#ifdef __STDC__
build_GLA_for_block( AST *block, GLA **tail )
#else
build_GLA_for_block( block, tail )
AST *block;
GLA **tail;
#endif
{
	GLA *p, *alt_tail, *blk_tail, *prev=NULL, *first;
	AST *alt;
	require(block!=NULL, "build_GLA_for_block: NULL tree pointer");

	blk_tail = newGLANode();
	for (alt=block->down; alt!=NULL; alt=alt->right)
	{
		p = newGLANode();
		p->p1 = build_GLA_for_ALT(alt, &alt_tail);
		p->label1 = epsilon;
		/* connect new alt into downward link */
		if ( prev!=NULL )
		{
			prev->p2 = p;
			prev->label2 = epsilon;
		}
		else
		{
			first = p;
		}
		prev = p;
		/* link alt to block tail node */
		alt_tail->p1 = blk_tail;
		alt_tail->label1 = epsilon;
	}
	*tail = blk_tail;
	return first;
}

/*
 * Tree looks like ( ALT elem1 ... elemn ).  Generate a GLA for each
 * element with build_GLA_for_element; make a tail node and return
 * pointer to GLA element built for first element.
 *
 * Tree patterns such as #(A b C) must be included because they may
 * invoke other rules or have subrules in them for which lookahead
 * must be computed.  The '#(' and ')' are basically ignored and #(A b C)
 * turns into 'A b C' for GLA construction purposes.  Note that this would
 * NOT work for LL(k>1).
 */
GLA *
#ifdef __STDC__
build_GLA_for_ALT( AST *alt, GLA **alt_tail )
#else
build_GLA_for_ALT( alt, alt_tail )
AST *alt;
GLA **alt_tail;
#endif
{
	GLA *p, *elem_tail, *first;
	AST *elem;
	require(alt!=NULL, "build_GLA_for_alt: NULL tree pointer");

	first = build_GLA_for_tree(alt->down, &elem_tail);
	*alt_tail = elem_tail;
	return first;
}

GLA *
#ifdef __STDC__
build_GLA_for_tree( AST *q, GLA **tree_tail )
#else
build_GLA_for_tree( q, tree_tail )
AST *q;
GLA **tree_tail;
#endif
{
	AST *t;
	GLA *elem_tail, *alt_tail, *start=NULL, *elem;

	for (t=q; t!=NULL; t = t->right)
	{
		elem = build_GLA_for_element(t, &elem_tail);
		if ( elem!=NULL )
		{
			if ( start==NULL ) { start = elem; alt_tail = elem_tail; }
			else {
				alt_tail->p1 = elem;
				alt_tail->label1 = epsilon;
				alt_tail = elem_tail;
			}
		}

		if ( t->down != NULL && (t->token == Token||t->token == WILD) )
		{
			elem = build_GLA_for_tree(t->down, &elem_tail);
			if ( elem==NULL ) continue;
			alt_tail->p1 = elem;
			alt_tail->label1 = epsilon;

			/* put an end-of-input node at the end of each sibling list so
			 * that lookahead computation don't take '#(A b) D' to be same
			 * as 'A b D'; i.e., 'D' CANNOT follow a reference to a 'b' in
			 * this context as the 'b' is at a lower level.  Only a NULL ptr
			 * can follow a ref to 'b'.  For example,
			 *
			 *		a : #(A b) D;
			 *		b : E
			 *		  |
			 *		  ;
			 *
			 * The lookahead for 'b' is {E} for alt1 and {end_of_input} of
			 * alt2 because for the second alt to match, the input subtree
			 * must be NULL.
			 */
			elem_tail->p1 = newGLANode();
			elem_tail->label1 = end_of_input;
			elem_tail = elem_tail->p1;
			alt_tail = elem_tail;
		}
	}
	if ( start==NULL )
	{
		start = newGLANode();
		*tree_tail = start;
	}
	else *tree_tail = alt_tail;
	return start;
}

GLA *
#ifdef __STDC__
build_GLA_for_element( AST *elem, GLA **elem_tail )
#else
build_GLA_for_element( elem, elem_tail )
AST *elem;
GLA **elem_tail;
#endif
{
	SymEntry *p;
	GLA *blkstart, *blkend, *blkloopback, *start=NULL;
	require(elem!=NULL, "build_GLA_for_element: NULL tree pointer");

	if ( elem == NULL ) return NULL;
	switch ( elem->token )
	{
	    case Token :
			p = (SymEntry *) hash_get(symbols, elem->text);
			require(p!=NULL, "build_GLA_for_element: token not in sym tab");
			start = newGLANode();
			start->p1 = newGLANode();
			start->label1 = p->token_type;
			start->upper_range = elem->upper_range;
			*elem_tail = start->p1;
			break;
		case NonTerm :
			/* edge 1 points to the start of the GLA for the referenced rule.
			 * Make a new node as if it
			 * were connected, however, so that this node can be connected to the end
			 * of block node by the build BLOCK routine.
			 */
			p = (SymEntry *) hash_get(symbols, elem->text);
			if ( p==NULL || !p->defined ) {
				errNoFL(eMsg1("rule not defined: '%s'", elem->text));
				start = newGLANode();
				start->p1 = newGLANode();
				start->label1 = wild_card;
				*elem_tail = start->p1;
				break;
			}
			start = newGLANode();
			start->is_rule_ref = 1;
			start->p1 = p->start_state;
			start->label1 = epsilon;
			start->next = newGLANode();
			*elem_tail = start->next;
			/* maintain reference list for this nonterminal */
			list_add(&p->refs, start->next);
			break;
		case WILD :
			start = newGLANode();
			start->p1 = newGLANode();
			start->label1 = wild_card;
			*elem_tail = start->p1;
			break;
		case Action :
			*elem_tail = NULL;
			break;
		case PRED_OP :
			/* return o->blk */
			start = newGLANode();
			start->p1 = build_GLA_for_block(elem->down, elem_tail);
			start->label1 = epsilon;
			elem->down->start_state = start;
			/* DO NOT RETURN the block ptr because we don't want the lookahead
			 * analysis to see it.  However, the AST BLOCK node will pt to it.
			 */
			start = NULL;
			*elem_tail = NULL;
			break;
		case CLOSURE :
			/* Make a blk like this:
			 *     v---------|
			 * o-->o-->blk-->o-->o
			 *     |-------------^
			 * where the farthest left node is the start node passed in,
			 * the 2nd from the right is created here, and the 2nd from the left
			 * is a node created here.  The farthest right node is created
			 * here as the end_blk node for the CLOSURE block.  The 'p2' ptr
			 * of the blkloopback node goes back to the blkstart node.
			 */
			blkstart = newGLANode();
			blkloopback = newGLANode();
			blkend = newGLANode();
			blkstart->p1 = build_GLA_for_block(elem->down, elem_tail);
			blkstart->label1 = epsilon;
			blkstart->p2 = blkend;
			blkstart->label2 = epsilon;
			blkloopback->p1 = blkend;
			blkloopback->label1 = epsilon;
			blkloopback->p2 = blkstart;
			blkloopback->label2 = epsilon;
			(*elem_tail)->p1 = blkloopback;
			(*elem_tail)->label1 = epsilon;
			*elem_tail = blkend;
			elem->down->start_state = blkstart;
			start = newGLANode();
			start->p1 = blkstart;
			start->label1 = epsilon;
			break;
		case POS_CLOSURE :
			/* Make a blk like this:
			 * o-->o-->blk-->o-->o
			 *     ^---------|
			 * where the farthest left node is the start node passed in.
			 * The 'next' ptr of the blkstart node points to the endblk node.
			 */
			blkstart = newGLANode();
			blkloopback = newGLANode();
			blkend = newGLANode();
			blkstart->p1 = build_GLA_for_block(elem->down, elem_tail);
			blkstart->label1 = epsilon;
			/* record the end of loop for "follow" computation */
			blkstart->next = blkend;
			blkloopback->p1 = blkend;
			blkloopback->label1 = epsilon;
			blkloopback->p2 = blkstart;
			blkloopback->label2 = epsilon;
			(*elem_tail)->p1 = blkloopback;
			(*elem_tail)->label1 = epsilon;
			*elem_tail = blkend;
			elem->down->start_state = blkstart;
			start = newGLANode();
			start->p1 = blkstart;
			start->label1 = epsilon;
			break;
		case OPT :
			/* Make a blk like this:
			 * o-->o-->blk-->o
			 *     |---------^
			 * where the farthest left node is the start node passed in.
			 */
			blkstart = newGLANode();
			blkend = newGLANode();
			blkstart->p1 = build_GLA_for_block(elem->down, elem_tail);
			blkstart->label1 = epsilon;
			blkstart->p2 = blkend;
			blkstart->label2 = epsilon;
			(*elem_tail)->p1 = blkend;
			(*elem_tail)->label1 = epsilon;
			*elem_tail = (*elem_tail)->p1;
			elem->down->start_state = blkstart;
			start = newGLANode();
			start->p1 = blkstart;
			start->label1 = epsilon;
			break;
		case BLOCK :
			/* return o->blk */
			start = newGLANode();
			start->p1 = build_GLA_for_block(elem, elem_tail);
			start->label1 = epsilon;
			elem->start_state = start;
			break;
	}
	return start;
}

void
#ifdef __STDC__
build_follow_links( GLA *end_of_rule, ListNode *refs )
#else
build_follow_links( end_of_rule, refs )
GLA *end_of_rule;
ListNode *refs;
#endif
{
	ListNode *p;
	GLA *f, *prev, *first;
	require(end_of_rule!=NULL, "build_follow_links: NULL tree pointer");

	if ( refs == NULL )	/* no ref list, must be start symbol */
	{
		/* append a '$' link to the end_input node */
		end_of_rule->p1 = end_node;
		end_of_rule->label1 = end_of_input;
		return;
	}

	/* the refs list is a list of GLA nodes that follow references to
	 * the rule associated with 'end_of_rule'.
	 */
	prev = NULL;
	for (p = refs->next; p!=NULL; p=p->next)
	{
		f = newGLANode();
		f->p1 = (GLA *)p->elem;
		f->label1 = epsilon;
		/* connect new follow link into downward list */
		if ( prev!=NULL )
		{
			prev->p2 = f;
			prev->label2 = epsilon;
		}
		else
		{
			first = f;
		}
		prev = f;
	}

	/* connect end of rule to follow list */
	end_of_rule->p1 = first;
	end_of_rule->label1 = epsilon;
}

void
#ifdef __STDC__
dump_GLAs( AST *rules )
#else
dump_GLAs( rules )
AST *rules;
#endif
{
	AST *r;
	SymEntry *s;
	require(rules!=NULL, "dump_GLAs: NULL rules pointer");

	fprintf(stderr,"\n");
	for (r=rules; r!=NULL; r=r->right)
	{
		s = (SymEntry *) hash_get(symbols, r->text);
		require(s!=NULL, "build_GLA: sym tab broken");
		dump_GLA( s->start_state );
		fprintf(stderr,"\n");
	}
}

/* Can only dump GLA's for BNF for the moment */
void
#ifdef __STDC__
dump_GLA( GLA *q )
#else
dump_GLA( q )
GLA *q;
#endif
{
    GLA *prod, *p;

    fprintf(stderr,"o-->");
    for (prod=q->p1; prod!=NULL; prod=prod->p2)
    {
        fprintf(stderr,"o");

        for (p = prod; p->p1!=NULL;)
        {
            if ( p->visited ) break;
            p->visited = 1;

            if ( p->label1 > 0 && p->label1!=epsilon )
            {
                fprintf(stderr,"--%s-->o", token_dict[p->label1]);
                p = p->p1;
                if ( p->label1 == end_of_input ) break;
			}
            else if ( p->next!=NULL )
            {
                /*fprintf(stderr,"-%d-^  o", p->context); */
                fprintf(stderr,"---^  o");
                p = p->next;
			}
            else
            {
                fprintf(stderr,"----->o");
                p = p->p1;
			}
		}
        fprintf(stderr,"\n");
        if ( prod->p2!=NULL ) fprintf(stderr,"    |\n    ");
	}
}

/*
 * Compare each alt of a BLOCK against every other to determine
 * whether or not any tokens predict more than one alt.
 */
void
#ifdef __STDC__
test_block_consistency( AST *blk, int block_type )
#else
test_block_consistency( blk, block_type )
AST *blk;
int block_type;
#endif
{
	AST *t;
	set in_common;
	GLA *alt1, *alt2, *start;
	int i1, i2;
	require(blk!=NULL&&blk->token==BLOCK,
			"test_BLOCK_consistency: NULL or invalid block");

	t = blk->down;
	require(t!=NULL&&t->token==ALT,
			"test_BLOCK_consistency: invalid AST structure");
	require(blk->start_state!=NULL,
			"test_BLOCK_consistency: no GLA structure for block");

	start = blk->start_state->p1;
	for (alt1=start,i1=1; alt1!=NULL; alt1=alt1->p2,i1++, t=t->right)
	{
		require(!set_nil(alt1->lookahead), "test_BLOCK_consistency: invalid lookahead set");
		for (alt2=alt1->p2,i2=i1+1; alt2!=NULL; alt2=alt2->p2,i2++)
		{
			in_common = set_and(alt1->lookahead, alt2->lookahead);
			if ( !set_nil(in_common) )
			{
				fprintf(stderr, ErrHdr, FileStr[t->file], t->line);
				fprintf(stderr, " warning: alts %d and %d of (...) nondeterministic upon ",
						i1, i2);
				set_fprint(stderr, in_common);
				fprintf(stderr, "\n");
			}
			set_free(in_common);
		}
	}

	if ( block_type == OPT || block_type == CLOSURE )
	{
		/* test optional alt against all regular alts */
		t = blk->down;
		for (alt1=blk->start_state->p1,i1=1; alt1!=NULL; alt1=alt1->p2,i1++, t=t->right)
		{
			in_common = set_and(alt1->lookahead, blk->start_state->lookahead);
			if ( !set_nil(in_common) )
			{
				fprintf(stderr, ErrHdr, FileStr[t->file], t->line);
				fprintf(stderr, " warning: alt %d and optional branch of %s nondeterministic upon ",
						i1, block_type==OPT?"{...}":"(...)*");
				set_fprint(stderr, in_common);
				fprintf(stderr, "\n");
			}
			set_free(in_common);
		}
	}
}

static GLA *
newGLANode()
{
	GLA *p;

	p = (GLA *) calloc(1, sizeof(GLA));
	require(p!=NULL, "newGLANode: can't alloc memory");
	p->in_rule = CurRule;
	return p;
}
