#include <string>
#include <vector>
#include <iostream>
using namespace std;

#include "category.h"
#include "parser.h"
#include "addressing.h"
#include "globals.h"
#include "utility.h"
#include "errors.h"
#include "files.h"

// parse a dii file at a given address and return a pointer to the root
// category. in case of an error, return zero.

Category *parseDIIFile( string address )
{
        if( !readFile( address ) )
	{
		cout << "cannot open file " + address << endl;
                return 0;
	}	
	
        return processDIIFile( address );
}

// the actual processing of the file. used by loadDIIFile(..) in the 
// navigation module and by parseDIIFile(..)

Category *processDIIFile( string address )
{
        macrosv.clear();
        preprov = new vector<string>;
        linenrv = new vector<int>;
        parsingheader = FALSE;
        parsingfile = address;
        linenr = 0;

        preprocess();

        Category *category = new Category();

        string currentLine = getLine();

        criticalerror = FALSE;
        parseRecursive( -1, category, currentLine );

        delete preprov;
        delete linenrv;

        if( criticalerror )
        {
                delete category;
		delete sourcev;

                return 0;
        }

        return category;
}

// parse a header file and insert the resulting information into a
// category.

int parseHeader( string address, Category *category )
{
    	int p_dii;
    	string p_extdir, p_file, p_intdir;
	string s_address = address;
	
    	if( !parseAddress( address, p_dii, p_extdir, p_file, p_intdir ) )
        	return FALSE;

    	address = slashConcat( p_extdir, p_file );

    	int entry;
    	if( cached( p_extdir, p_file, entry ) )
    	{
#ifndef GUI
        	cout << "( header file " << address << " already parsed )" << endl;
#endif	

        	insertHeaderInfo( cacheCatv[ entry ], category );
        	return TRUE;
    	}

    	vector<string> *s_sourcev = sourcev;

    	if( !readFile( address ) )
    	{
		addParseWarning( "cannot open header file \"" + s_address + "\"" );    

        	sourcev = s_sourcev;
        	return FALSE;
    	}

    	Category *headerCat = new Category();

    	int s_linenr = linenr;
    	string s_parsingfile = parsingfile;
    	vector<string> *s_preprov = preprov;
    	vector<int> *s_linenrv = linenrv;

    	preprov = new vector<string>;
    	linenrv = new vector<int>;
    	parsingfile = address;
    	parsingheader = TRUE;
    	linenr = 0;

    	preprocess();

    	string currentLine = getLine();
    	while( !closingLine( currentLine ) )
        	checkOptions( currentLine, headerCat, -1 );

        if( headerCat->owner != "" )
            category->owner = headerCat->owner;
        if( headerCat->email != "" )
            category->email = headerCat->email;
        if( headerCat->home != "" )
            category->home = headerCat->home;
    	//insertHeaderInfo( headerCat, category );

    	cacheCatv.push_back( (Category *)headerCat );
	cacheAddressv.push_back( p_extdir );
	cacheAddressv.push_back( p_extdir );
	cacheAddressv.push_back( p_file );
    	cacheSourcev.push_back( 0 );

    	delete sourcev;
    	delete preprov;
    	delete linenrv;

    	linenr = s_linenr;
    	parsingfile = s_parsingfile;
    	parsingheader = FALSE;
    	sourcev = s_sourcev;
    	preprov = s_preprov;
    	linenrv = s_linenrv;

    	return TRUE;
}

// pre-process stage. take global vector<string> *sourcev, preprocess the
// vector and place the result in global vector<string> *preprov.
// pre-processing involves tasks such as removing comments and joining lines.

void preprocess()
{
        int i, closed = FALSE, unfinishedcomment = FALSE;
        string line;
        unsigned int n;
        string ctrlchars = ";:#,=|&^<>";

        for( n = 0; n < sourcev->size(); n++ )
        {
                line = (*sourcev)[n];

                // CR/LF's eruit
                while( ( i = line.find_first_of( "\n\r" ) ) >= 0 )
                        line.erase( i, 1 );

		(*sourcev)[n] = line; // exp

                // tabs eruit
                while( ( i = line.find( '\t' ) ) >= 0 )
                        line.replace( i, 1, string( 8 - i % 8, ' ' ) );
		
                // megacomments eruit
                if( ( i = line.find( "(*" ) ) != -1 )
                {
		    line = line.substr( 0, i );

		    while( ( (*sourcev)[++n].find( "*)" ) == -1 ) &&
		 	   ( n != sourcev->size()-1 ) );

		    if( n == sourcev->size()-1 )
			    unfinishedcomment = TRUE;
               	}

                // comments eruit
                if( ( i = line.find( "**" ) ) != -1 )
                    line = line.substr( 0, i );

		// only finishing comment
		if( line.find( "*)" ) != -1 )
			addParseWarning( "illegal character sequence: *)", n+1 );
		
                // empty line ?
                if( trim( line ) == "" )
                        continue;

                // join lines beginning/ending with control chars

                if( preprov->size() > 0 )
                {
                        string &prev = (*preprov)[ preprov->size()-1 ];

                        if( ( ctrlchars.find( line[ line.find_first_not_of( ' ' ) ] ) != -1 ) ||
                            ( ctrlchars.find( prev[ prev.find_last_not_of( ' ' ) ] ) != -1 ) ||
                            ( line[ line.find_first_not_of( ' ' ) ] == ')' ) ||
                            ( prev[ prev.find_last_not_of( ' ' ) ] == '(' ) )
                        {
                                prev += trim( line );
                                continue;
                        }

                        if( prev[ i = prev.find_last_not_of( ' ' ) ] == '\\' )
                        {
                                string attach = trim( line );
                                if( attach != "" )
                                    prev = prev.substr( 0, i ) + attach;

                                continue;
                        }

                        if( ( i = prev.length()-2 ) >= 0 )
                        {
                                if( ( prev.rfind( "->" ) == i ) ||
                                    ( prev.rfind( "<-" ) == i ) ||
                                    ( prev.rfind( "+>" ) == i ) ||
                                    ( prev.rfind( "!=" ) == i ) ||
                                    ( prev.rfind( "<=" ) == i ) ||
                                    ( prev.rfind( ">=" ) == i ) ||
                                    ( prev.rfind( "||" ) == i ) ||
                                    ( prev.rfind( "&&" ) == i ) )
                                {
                                        prev += trim( line );
                                        continue;
                                }
                        }
                        if( ( i = line.find_first_not_of( ' ' ) ) != -1 )
                        {
                                if( ( line.find( "->" ) == i ) ||
                                    ( line.find( "<-" ) == i ) ||
                                    ( line.find( "+>" ) == i ) ||
                                    ( prev.find( "!=" ) == i ) ||
                                    ( prev.find( "<=" ) == i ) ||
                                    ( prev.find( "&&" ) == i ) ||
                                    ( prev.find( "||" ) == i ) ||
                                    ( prev.find( ">=" ) == i ) )
                                {
                                        prev += trim( line );
                                        continue;
                                }
                        }
                }

                preprov->push_back( rtrim( line ) );
                linenrv->push_back( n+1 );

                if( closingLine( line ) )
                {
                        closed = TRUE;
                        break;
                }
        }

        if( !closed )
        {
                preprov->push_back( "%" );
                linenrv->push_back( n-1 );
                linenr = linenrv->size();
		if( unfinishedcomment )
			addParseWarning( "unfinished multi-line comment" );
		else
                	addParseWarning( "missing end-of-file sign ( '%' )" );
                linenr = 0;
        }

    	for( unsigned int n = 0; n < preprov->size(); n++ )
    	{
                string &s = (*preprov)[n];

                if( s[ s.length()-1 ] == '}' )
                        s = s.substr( 0, s.length()-1 );

//                cout << "^" << s << endl;
	}	
}

// the central recursive parsing function.

int parseRecursive( int depth, Category *currentCategory, string &currentLine )
{
    int rdepth = ( depth < 0 ) ? 0 : depth;
	
    while( !closingLine( currentLine ) )
    {
        if( !checkOptions( currentLine, currentCategory, depth ) )
        {
            criticalerror = TRUE;
            return TRUE;
        } 

        if( closingLine( currentLine ) )
            break;

        int i = currentLine.find_first_not_of( ' ' );

        if( i > depth )
        {
            Category *newCat = new Category();
	    newCat->parent = currentCategory; 

            int j;
            if( ( j = currentLine.find( '/', rdepth ) ) != -1 )
                newCat->name = trim( currentLine.substr( rdepth, j - rdepth  ) );
            else
                newCat->name = trim( currentLine.substr( rdepth, currentLine.length() - rdepth ) ); 

            if( !setCategoryFlags( newCat ) )
            {
                criticalerror = TRUE;
                return TRUE;
            } 

	    if( newCat->realsubcat )
		    newCat->inheritProperties();

	    int k;
	    int tocontinue = FALSE;
	    
	    if( ( ( k = newCat->name.find( "->" ) ) != -1 ) ||
	 	( ( k = newCat->name.find( "+>" ) ) != -1 ) ||   
		( ( k = newCat->name.find( "<-" ) ) != -1 ) )
	    {
            	if( parseTarget( currentLine, newCat ) )
	    	{
		    newCat->name = trim( newCat->name.substr( 0, k ) );
		    
		    currentLine = getLine();
		    tocontinue = TRUE;
	        }
	    }  

	    int m;
	    string temp = newCat->name;
	    while( ( m = temp.find( "\\." ) ) != -1 )
		    temp.erase( m, 2 );
	    if( temp.find( "." ) != -1 )
	 	addParseWarning( "use \"\\.\" instead of \".\" in category name" );
		    
	    currentCategory->addSubcat( newCat );
	    if( tocontinue )
		    continue;

            int l;
            if( ( ( l = currentLine.find( '/', rdepth ) ) != -1 ) &&
                ( l != currentLine.find_last_not_of( ' ' ) ) )
            {
                currentLine.replace( rdepth, l - rdepth,
                                         string( l - rdepth, ' ' ) );
	    
                if( parseRecursive( i, newCat, currentLine ) )
                    return TRUE;
            }
            else
            {
                currentLine = getLine();

                if( parseRecursive( i, newCat, currentLine ) )
                    return TRUE;
            } 

            continue;
        }
        else
            return FALSE;

        currentLine = getLine();
    }

    return TRUE; // end recursion
}

// set category flags, such as invisibility or real subcategory. used by 
// parseRecursive(..)

int setCategoryFlags( Category *category )
{
    int k = category->name.find_first_not_of( ' ' );
    category->name = category->name.substr( k, category->name.length()-k );

    while( category->name.length() &&
  	   ( string( "!@+$" ).find( category->name[0] ) != -1 ) )
    {
        if( category->name[0] == '!' )
            category->invisible = TRUE;
        else if( category->name[0] == '@' )
            category->realsubcat = TRUE;
	else if( category->name[0] == '$' )
            category->transsummation = TRUE;
	else
	    category->forcevisible = TRUE;

        category->name.erase( 0, 1 );
    }

    k = category->name.find_first_not_of( ' ' );
    category->name = category->name.substr( k, category->name.length()-k );

    return TRUE;
}

// if the string contains a reference, parse it, adjust the category and
// remove the reference from the string. used by parseRecursive(..)

int parseTarget( string &line, Category *category )
{
    string target, rest;
    int k;

    // parse mask, selection and simple reference

    if( line.find( "->" ) != -1 )
    {
        int n =
            line.find_first_not_of( ' ', line.find( "->" )+2 );

        if( n == -1 )
            addParseWarning( "no reference target specified" ); 
        else
        {
            target = line.substr( n, line.length() - n );

            // process mask

            if( ( n = target.find_first_of( '#' ) ) != -1 )
            {
                // extract target and mask expressions

                rest = trim( target.substr( n+1, target.length()-n-1 ) );
                target = trim( target.substr( 0, n ) );

                if( target == "" )
                    addParseWarning( "no reference target specified" ); 
                if( rest == "" )
                    addParseWarning( "no mask expressions specified" ); 

                if( ( target != "" ) &&
                    ( rest != "" ) )
                {
                    category->data1v = new vector<string>;
                    dirToVec( rest, *(category->data1v), ',', FALSE );
                    category->mask = TRUE;
                }
            }

            // process selection

            else
            {
		int i, j;

                rest = trim( target.substr( n+1, target.length()-n-1 ) );

		string temp = rest;
		if( ( j = rest.find( ";" ) ) != -1 )
			temp = rest.substr( 0, j );
		if( ( i = temp.find_last_of( ',' ) ) != -1 )
			rest = trim( rest.substr( i+1, rest.length()-i-1 ) );
		
                // skip colons in protocol/drive indication..

                if( ( rest.find( ":/" ) == 1 ) &&
                    ( rest[0] >= 'A' ) &&
                    ( rest[0] <= 'z' ) )
                    i = 3;
                else if( rest.find( "http://" ) == 0 )
                    i = 7;
                else if( rest.find( "ftp://" ) == 0 )
                    i = 6;
		else
		    i = 0;

                // encountered selection

                if( ( i = rest.find( ':', i ) ) != -1 )
                {
                    // extract target and selection name

                    rest = rest.substr( i+1, rest.length()-i-1 );
                    target = target.substr( 0, target.find_last_of( ':' ) );

		    if( ( j = rest.find( ";" ) ) != -1 )
		    {
			    target += rest.substr( j, rest.length()-j );
			    rest = rest.substr( 0, j );
		    }
		    
		    rest = trim( rest );
		    target = trim( target );

                    if( target == "" )
                        addParseWarning( "no reference target specified" ); 
                    if( rest == "" )
			addParseWarning( "no selection specified" );

                    if( ( target != "" ) &&
                        ( rest != "" ) )
                    {
                        category->data1v = new vector<string>;
                        category->data1v->push_back( trim( rest ) );
                        category->selection = TRUE;
                    }
                }
            }

            if( target != "" )
                category->target = target; 
        }

        k = line.find_last_not_of( ' ', line.find( "->" )-1 );
    }

    else if( line.find( "+>" ) != -1 )
    {
        int n = line.find_first_not_of( ' ', line.find( "+>" )+2 );
	if( n == -1 )
		target = "";
	else
		target = line.substr( n, line.length() - n );

	if( target == "" ) 
		addParseWarning( "no reference target specified" );
	
	int i;
	
        if( ( i = target.find_first_of( ";#" ) ) != -1 )
        {
                string masks, criteria;
				
		rest = trim( target.substr( i, target.length()-i ) );
		target = trim( target.substr( 0, i ) );

		if( target == "" ) 
			addParseWarning( "no reference target specified" );

		if( ( i = rest.find( "#" ) ) != -1 ) 
		{
			masks = rest.substr( i+1, rest.length()-i-1 );
			if( ( i = masks.find( ";" ) ) != -1 )
				masks = masks.substr( 0, i );

			if( trim( masks ) != "" )
			{
                    		category->data1v = new vector<string>;
                    		dirToVec( masks, (*category->data1v), ',', FALSE );
			}	
			else
				addParseWarning( "no mask expressions specified" );
		}
		if( ( i = rest.find( ";" ) ) != -1 )
		{
			criteria = rest.substr( i+1, rest.length()-i-1 );
			if( ( i = criteria.find( "#" ) ) != -1 )
				criteria = criteria.substr( 0, i );
                    		
			if( trim( criteria ) != "" )
			{
				category->data2v = new vector<string>;
                    		dirToVec( criteria, (*category->data2v), ',', FALSE );
			}
			else
				addParseWarning( "no sort criteria specified" );
		}
	}

    	if( target != "" ) 	
	{	
	        category->target = target;
       	        category->summation = TRUE;
        }

        k = line.find_last_not_of( ' ', line.find( "+>" )-1 );
    }

    else if( line.find( "<-" ) != -1 )
    {
        int n =
            line.find_first_not_of( ' ', line.find( "<-" )+2 );

        if( n == -1 )
            addParseWarning( "no reference target specified" );
        else
        {
            category->data1v = new vector<string>;

            if( ( line[n] >= '0' ) &&
                ( line[n] <= '9' ) )
            {
                string s = " ";
                s[0] = line[n];
                category->data1v->push_back( s );
                n = line.find_first_not_of( ' ', n+1 );
            }
            else
                category->data1v->push_back( "0" );

            if( n == -1 )
		addParseWarning( "no reference target specified" );
            else
            {
                target = line.substr( n, line.length() - n );
                category->target = target; 
                category->generalization = TRUE;
            }
        }

        k = line.find_last_not_of( ' ', line.find( "<-" )-1 );
    }
    else
        return FALSE;

    line = line.substr( 0, k+1 );
    return TRUE;
}

// used by parseHeader(..) to insert information from a cached header file 
// into a category

void insertHeaderInfo( Category *headerCat, Category *category )
{
    if( headerCat->owner != "" )
        category->owner = headerCat->owner;
    if( headerCat->email != "" )
        category->email = headerCat->email;
    if( headerCat->home != "" )
        category->home = headerCat->home;

    if( headerCat->data1v != 0 )
    for( int n = 0; n < headerCat->data1v->size(); n++ )
        macrosv.push_back( (*(headerCat->data1v))[n] );
}

// parse attributes. used by parseRecursive(..) and parseHeader(..)

int checkOptions( string &currentLine, Category *category, int depth )
{
    string specials = "bcdfhilmnwopt@";
    string hspecials = "o@";
    string een, twee, drie, vier;
    string line;
    int i, j;

    char firstchar, secondchar;

    do
    {
	if( closingLine( currentLine ) )
		return TRUE;

	checkMultipleSlashes( currentLine );
	line = currentLine;
	
        firstchar = line[ i = line.find_first_not_of( ' ' ) ];
        secondchar = line[ j = line.find_first_not_of( ' ', i + 1 ) ];

        if( secondchar != ':' )
        {
            if( parsingheader )
	    {
		addParseWarning( "header file may only contain o: and @: attributes" );

		line = currentLine = getLine();
		continue;
	    }
        }
        else
	{
            line.erase( 0, j + 1 );

            // only subset of options allowed when parsing header

            if( parsingheader && ( hspecials.find( firstchar ) == -1 ) )
            {
                addParseWarning( "header file may only contain o: and @: attributes" );

                line = currentLine = getLine();
                continue;
            }

	    parseTarget( line, category );

            //check for unknown option
            
	    if( specials.find( firstchar ) == -1 )
            {
                string message;
                takeTo( "", line, message );
                message = "unknown option ( " + trim( ctos( firstchar ) + ": " + message ) + " )";
                addParseWarning( message );

                line = currentLine = getLine();
                continue;
            }
			
            switch( firstchar )
            {
                case '@':
                    if( category->parent != 0 )
                    {
			addParseWarning( "macro definition outside root category" );
                        break;
                    }

                    if( !takeTo( "=", line, een ) )
                    {
                        addParseWarning( "syntax error in macro definition" );
                        break;
                    }

                    takeTo( "", line, twee );
		    een = "_" + een;

                    if( parsingheader )
                    {
                        if( category->data1v == 0 )
                            category->data1v = new vector<string>;
                        category->data1v->push_back( een );
                        category->data1v->push_back( twee );
                    }
                    //else 
                    {
                        macrosv.push_back( een );
                        macrosv.push_back( twee );
                    }

                    break;

                case 'b':
                    takeTo( "", line, een );
                    category->addBase( een );
                    break;

                case 'c':
                    takeTo( "", line, een );
                    category->addChat( een );
                    break;

                case 'd':
                    takeTo( "", line, een );
                    category->addDiscuss( een );
                    break;

                case 'f':
                    takeTo( "", line, een );
                    category->addFaq( een );
                    break;

                case 'h':
                    if( category->parent != 0 )
                    {
			addParseWarning( "header file inclusion outside root category" );
                        break;
                    }

                    takeTo( "", line, een );
                    parseHeader( een, category ); 
                    break;

                case 'i':
                    takeTo( "", line, een );
                    category->addParagraph( category->info, een );
                    break;

		case 'w':
		    takeTo( "", line, een );
		    category->addParagraph( category->warning, een );
		    break;

                case 'l':
                    takeTo( "", line, een );
                    category->addLink( een );
                    break;

                case 'm':
                    takeTo( "", line, een );
                    category->addMlist( een );
                    break;

                case 'n':
                    takeTo( "", line, een );
                    category->addParagraph( category->news, een );
                    break;

                case 'o':
                    een = twee = drie = category->home = category->owner =
                    category->email = "";

                    if( takeTo( ",", line, een ) &&
                        ( removed == ',' ) )
                        if( takeTo( ",", line, twee ) &&
                            ( removed == ',' ) )
                            if( takeTo( ",", line, drie ) &&
                                ( removed == ',' ) )
                                takeTo( ",", line, vier );

                    sortOwnerSpec( category, een );
                    sortOwnerSpec( category, twee );
                    sortOwnerSpec( category, drie );
                    sortOwnerSpec( category, vier );
                    break;

                case 'p':
                    if( !category->parent )
                    {
			addParseWarning( "property definition in root category" );
                        break;
                    }

                    int next;
                    do
                    {
                        next = FALSE;
                        if( !takeTo( "=,", line, een ) )
                            category->addProperty( trim( een ), "" );
                        else if( removed == ',' )
                        {
                            category->addProperty( trim( een ), "" );
                            next = TRUE;
                        }
                        else
                        {
                            if( takeTo( ",", line, twee ) )
                                next = TRUE;
                            category->addProperty( trim( een ), trim( twee ) );
                        }
                    } while( next );
                    break;

                case 't':
                    takeTo( "", line, category->title );
                    break;
            }

            line = currentLine = getLine();
	}
    } while( secondchar == ':' );

    return TRUE;
}

// used by checkOptions(..) 

int takeTo( const string &special, string &line, string &result )
{
	removed = ' '; // special char that we find first

	if( line.find_first_not_of( ' ' ) == -1 )
	{
		result = "";
		return FALSE;
	}

	int i = line.find_first_not_of( ' ' );
	int j;
	if( ( j = line.find_first_of( special, i ) ) == -1 )
	{
		result = line.substr( i, line.length() - i );
		line.erase( i, line.length() - i );
		return FALSE;
	}	

	removed = line[j];

        result = trim( line.substr( i, j - i ) );

        line.erase( i, j - i + 1 );
	if( line.find_first_not_of( ' ' ) == -1 )
                line = getLine();

        return TRUE;
}
			
// used by checkOptions(..)

void sortOwnerSpec( Category *category, string term )
{
	if( term == "" )
		return;

	if( term.find( '@' ) != -1 )
		category->email = term;
		
	else if( term.find( "http://" ) == 0 )

	//	 ( ( term.length() > 3 ) && ( term.find( ".htm" ) == term.length()-4 ) ) ||
	//	 ( ( term.length() > 4 ) && ( term.find( ".html" ) == term.length()-5 ) ) )
		category->home = term;

	else
		category->owner = term;
}

// check to see if we're at the last line

int closingLine( string &line )
{
        return( trim( line ) == "%" );
}

// get the next line from vector<string> *preprov

string getLine()
{
        string line = (*preprov)[ linenr++ ];

        return macroExpand( line );
}

// show warning in case of ill-placed slashes. used by checkOptions(..)

void checkMultipleSlashes( string &line )
{
	while( line.find_first_not_of( "/ " ) == -1 )
	{
		addParseWarning( "ill-placed forward slash(es)" );
		line = getLine();
	}

	int i, count = 0;
       	while( line[ i = line.find_first_not_of( ' ' ) ] == '/' )
	{
		count++;
       		line[i] = ' ';
	}	

	if( count > 1 )
		addParseWarning( "ill-placed forward slash(es)" );
}

// expand macros in target

string macroExpand( string target )
{
#ifdef KASCADE_EDITOR
    return target;
#endif

//    cout << "exp: " + target << endl;

    int i;
    for( int n = 0; n < macrosv.size(); n += 2 )
        while( ( i = target.find( macrosv[n] ) ) != -1 )
	{	
            target.replace( i, macrosv[n].length(), macrosv[n+1] );
 //   	    cout << "to: " << target << endl;
	}
    
    return target;
}

