/* catdvi - get text from DVI files
   Copyright (C) 1999 Antti-Juhani Kaijanaho <gaia@iki.fi>
   Copyright (C) 2000-01 Bjoern Brill <brill@fs.math.uni-frankfurt.de>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <assert.h>
#include <stdlib.h>
#include "fontinfo.h"
#include "page.h"
#include "outenc.h"
#include "util.h"
#include "glyphops.h"
#include "linebuf.h"
#include "density.h"
#include <errno.h>

/* A page is modeled as an ordered doubly-linked list of boxes (x, y,
   glyph), where the ordering is left-to-right top-to-bottom. */

enum box_flag_t {
    BF_SWIMMING =   1 << 0,
    	/* (x, y) != (left border, baseline). We need to
    	 * move this one to the "logically right" place.
	 */
    BF_HAS_AXIS =   1 << 1,
    	/* box_t.axis is valid, i.e. we know where the math
    	 * axis passes through this box.
	 */
    BF_ON_AXIS =    1 << 2,
    	/* This box is centered on the math axis (e.g. the
    	 * big operators). Implies BF_SWIMMING.
    	 */
    BF_RADICAL =    1 << 3,
    	/* This is a radical sign (and hence "hanging down"
    	 * from the y coordinate). Implies BF_SWIMMING.
	 */
    BF_DIACRITIC =  1 << 4 
    	/* This is a diacritical mark. Often its baseline is vertically
	 * displaced against the baseline of the glyph it should accent
    	 * because the combination looks better this way).
	 * Implies BF_SWIMMING.
	 */
};
   
struct box_t {
        sint32 x;
        sint32 y;
	sint32 axis;
	
        sint32 width;
        sint32 height;
	sint32 depth;
	sint32 axis_height; /* The TeX font parameter. <= 0 if unknown. */
	
        glyph_t glyph;
        font_t font;
	enum box_flag_t flags;
};

/* Return value like with strcmp and friends. */
static int compare_boxes(struct box_t a, struct box_t b)
{
        if (a.y > b.y) return 1;
        if (a.y < b.y) return -1;
        assert(a.y == b.y);
        if (a.x > b.x) return 1;
        if (a.x < b.x) return -1;
        assert(a.x == b.x);
        return 0;
}

struct point_t {
    sint32 x;
    sint32 y;
};

static int point_in_box(const struct point_t * point, const struct box_t * box)
{
    assert(box);
    assert(point);
    return(
    	box->x      	    	<= point->x &&
	(box->x + box->width) 	>= point->x &&
	(box->y - box->height) 	<= point->y &&
	box->y      	    	>= point->y
    );
}

struct list_node_t {
        struct box_t b;
        struct list_node_t * prev;
        struct list_node_t * next;
};

/* Set to 1 when catdvi is invoked with the --sequential option */
int page_sequential = 0;

/* Set to 1 when catdvi is invoked with the --list-page-numbers option */
int page_list_numbers = 0;

/* List support variables.  Note that most insertions happen near the
   previous insertion, so it's beneficial to keep a pointer to the
   last inserted node. */
static struct list_node_t * list_head;
static struct list_node_t * list_tail;
static struct list_node_t * list_latest; /* Node that was inserted last. */

/* left and right margins - set by page_end(), also used by
 * decide_space_count() and page_print().
 */
static sint32 left_margin, right_margin;

/* flags to avoid the page_adjust_*() loops over the whole page when
 * there's nothing to adjust.
 */
static int page_has_diacritics;
static int page_has_texmext;
static int page_has_radicals;

static void swap_nodes(struct list_node_t * one, struct list_node_t * other)
{
        assert(one != 0);
        assert(other != 0);
        assert(one->next == other);
        assert(one == other->prev);

        one->next = other->next;
        other->prev = one->prev;
        other->next = one;
        one->prev = other;
        if (one->next == 0) {
                list_tail = one;
        } else {
                one->next->prev = one;
        }
        if (other->prev == 0) {
                list_head = other;
        } else {
                other->prev->next = other;
        }

        assert(other->next == one);
        assert(other == one->prev);
}

/* move list_latest into its correct position in the list */
static void bubble(void)
{
        while (1) {
                assert(list_latest != 0);

                /* If it is in correct position now, end here. */
                if ((list_latest->prev == 0
                     || compare_boxes(list_latest->prev->b, list_latest->b) <= 0)
                    && (list_latest->next == 0
                        || compare_boxes(list_latest->b, list_latest->next->b) <= 0)) {
                        return;
                }

                /* If it is too left, we must bubble it one position to the right. */
                if (list_latest->next != 0
                    && compare_boxes(list_latest->b, list_latest->next->b) > 0) {
                        swap_nodes(list_latest, list_latest->next);
                        continue;
                }

                /* If it is too right, we must bubble it one position to the left. */
                if (list_latest->prev != 0
                    && compare_boxes(list_latest->prev->b, list_latest->b) > 0) {
                        assert(list_latest->prev->next == list_latest);
                        swap_nodes(list_latest->prev, list_latest);
                        continue;
                }
                NOTREACHED;
        }
        NOTREACHED;
}

static void insert_list(struct box_t box)
{
        struct list_node_t * new;

        new = malloc(sizeof(struct list_node_t));
        if (new == 0) enomem();
        new->b = box;

        if (list_head == 0 || list_tail == 0 || list_latest == 0) {
                assert(list_head == 0);
                assert(list_tail == 0);
                assert(list_latest == 0);

                new->next = new->prev = 0;

                list_head = list_tail = list_latest = new;
                return;
        }

        /* The list is nonempty. */

        assert(list_head != 0);
        assert(list_tail != 0);
        assert(list_latest != 0);

        /* The insertion algorithm is this: insert the node after the
           latest insertion, and then bubble it into its correct
           position.  */

        new->next = list_latest->next;
        list_latest->next = new;
        new->prev = list_latest;
        list_latest = new;
        if (new->next == 0) {
                list_tail = new;
        } else {
                new->next->prev = new;
        }

        assert(list_latest->prev->next == list_latest);
        assert(list_latest->next == 0 || list_latest->next->prev == list_latest);

    	/* sort by position except in sequential mode */
        if (!page_sequential) bubble();
}


/* These two are set by command line options */
struct pageref_t page_start_output = {0, 0, 0, PRF_PHYSICAL};
struct pageref_t page_last_output = {SINT32_MAX, 0, 0, PRF_PHYSICAL};

static struct pageref_t current_page = {0, 0, 0, PRF_INVALID};


void page_begin(sint32 count0)
{
        /* If necessary, delete the previous page's data structure. */
        if (list_head != 0 || list_tail != 0 || list_latest != 0) {
                struct list_node_t * p;

                assert(list_head != 0);
                assert(list_tail != 0);
                assert(list_latest != 0);
                
                p = list_head;
                while (p != 0) {
                        struct list_node_t * next;

                        if (p == list_head)   list_head = 0;
                        if (p == list_latest) list_latest = 0;
                        if (p == list_tail)   list_tail = 0;

                        next = p->next;
                        free(p);
                        p = next;
                }
        }
        assert(list_head == 0);
        assert(list_tail == 0);
        assert(list_latest == 0);

    	/* reset per page flags */
	page_has_diacritics = 0;
	page_has_texmext = 0;
	page_has_radicals = 0;

	/* keep track of page numbering */
	if(current_page.flavour == PRF_INVALID) {
	    /* here comes the very first page of the document */
	    current_page.physical = 1;
	    current_page.count0 = count0;
	    if (count0 < 0) current_page.chapter = 1;
	    else current_page.chapter = 0;
	    	/* number chapters from 0 to accomodate frontmatter */
	    current_page.flavour = PRF_COMPLETE;
	}
	else {
	    /* just another ordinary page */
    	    current_page.physical += 1;
	    if (pageref_count0_cmp(current_page.count0, count0) >= 0) {
		/* count0 has not increased, so start new chapter */
		current_page.chapter += 1;
	    }
	    current_page.count0 = count0;
	}
}


void page_set_glyph(
    font_t font, glyph_t glyph,
    sint32 width, sint32 height, sint32 depth, sint32 axis_height,
    sint32 x, sint32 y
)
{
        struct box_t b;

        b.x = x;
        b.y = y;
        b.width = width;
        b.height = height;
	b.depth = depth;
        b.glyph = glyph;
        b.font = font;
	b.axis_height = axis_height;
	b.flags = 0;
	b.axis = 0;
	
    	if(glyph_get_hint(glyph) & GH_DIACRITIC) {
	    b.flags |= BF_SWIMMING | BF_DIACRITIC ;
	    page_has_diacritics = 1;
	}
	    
	if(glyph_get_hint(glyph) & GH_EXTENSIBLE_RECIPE) {
	    b.flags |= BF_SWIMMING;
	    page_has_texmext = 1;
	}
	else if(glyph_get_hint(glyph) & GH_ON_AXIS) {
	    b.flags |= BF_SWIMMING | BF_ON_AXIS | BF_HAS_AXIS ;
	    b.axis = y + (-height + depth)/2;
	    page_has_texmext = 1;
	}
	else if(glyph_get_hint(glyph) & GH_RADICAL) {
	    b.flags |= BF_SWIMMING | BF_RADICAL ;
	    page_has_radicals = 1;
	}
	else if(axis_height > 0) {
	    b.flags |= BF_HAS_AXIS;
	    b.axis = y - axis_height;
	}

        insert_list(b);
}

static int decide_space_count(
    struct list_node_t * prev_p,
    struct list_node_t * p
)
{
        font_t prev_font, curr_font;
        sint32 prev_x, prev_width;
        sint32 curr_x;
        sint32 delta;
        int (*f2spc)(sint32, sint32) = font_w_to_space;

        assert((prev_p != 0) || (p != 0));


        /* If we are at the start of a new line, we don't use the previous
	 * box. Otherwise, the decision depends on the previous box too.
	 */
	if (prev_p && p && (prev_p->b.y != p->b.y)) prev_p = NULL;
        if (prev_p) {
                prev_font = prev_p->b.font;
                prev_x = prev_p->b.x;
                prev_width = prev_p->b.width;
        } else {
                prev_font = p->b.font;
                prev_x = left_margin;
                prev_width = 0;
        }

    	/* To estimate the amount of whitespace that required to
	 * fill the space from the end of the line to the right margin,
	 * we're called with p == NULL
	 */
        if (p) {
        	curr_font = p->b.font;
        	curr_x = p->b.x;
        } else {
        	curr_font = prev_font;
        	curr_x = right_margin;
        }
	
        delta = curr_x - (prev_x + prev_width);

        return (1 + f2spc(curr_font, delta) + f2spc(prev_font, delta)) / 2;
}

static void page_end_sequential(void);

static void page_adjust_diacritics(void);

static void page_pair_accenting(
    struct list_node_t * pbase,
    struct list_node_t * pdiacritic,
    int direction
);

static void page_adjust_texmext(void);
static void page_adjust_radicals(void);

static void page_print(
    struct linebuf_t lines[],
    sint32 lines_ypos[],
    int nlines,
    struct scdf_t * col_density
);

void page_end(void)
{
        struct list_node_t * p, * prev_p;
        sint32 prev_y, height = 1;
	sint32 prev_tab, this_tab;
        int linecount = 0;
	int last_word_width = 0;
        int space_count, space_reserve;
	struct linebuf_t * lbs, * lb;
	struct linebuf_t last_word;
	struct scdf_t col_density;
	sint32 * lines_ypos, * this_ypos;

        pmesg(50, "BEGIN page_end\n");
	
    	if (
	    pageref_cmp(&page_start_output, &current_page) > 0 ||
	    pageref_cmp(&page_last_output, &current_page) < 0
	) {
	    if(msglevel >= 80) {
	    	pmesg(80, "skipping page by user request:\n  ");
		pageref_print(&current_page, stderr);
	    }
            pmesg(50, "END page_end\n");
	    return;
	}

    	if(page_list_numbers) {
	    pageref_print(&current_page, stdout);
            pmesg(50, "END page_end\n");
	    return;
	}
	
    	page_adjust_diacritics();
	
    	if(page_sequential) {
	    page_end_sequential();
            pmesg(50, "END page_end\n");
	    return;
	};
	
	/* Life is much easier if we don't have to care for the case
	 * of an empty page all the time.
	 */
    	if(list_head == NULL) {
            puts("\f\n");
	    pmesg(80, "This page is empty.\n");
            pmesg(50, "END page_end\n");
	    return;
	}

    	page_adjust_texmext();
    	page_adjust_radicals();

        /* Count lines, find left and right margins */
	left_margin = SINT32_MAX;
	right_margin = SINT32_MIN;
	    /* highest and lowest value guaranteed to fit into an sint32 */
	prev_y = list_head->b.y;
        for (p = list_head; p != 0; p = p->next) {
	    	left_margin = min(left_margin, p->b.x);
		right_margin = max(right_margin, p->b.x + p->b.width); 
                if (p->b.y > prev_y) {
                        prev_y = p->b.y;
                        linecount++;
                }
        }
	++linecount; /* The last line hasn't been counted */
	pmesg(80, "left margin: %ld, right margin: %ld.\n", \
	    left_margin, right_margin);

    	/* col_dens measures the column density at every point between
	 * left and right margin which is required to do proper positioning
	 * of every tab stop (see below).
	 */
    	scdf_init(&col_density, left_margin, right_margin, 0);
	
	/* The text contained in the page is stored in an array of linebufs,
	 * one for each line. Printout is done in one go by page_print()
	 * after we have parsed the complete page and collected the
	 * neccessary layout data.
	 */
	lbs = malloc(linecount * sizeof(struct linebuf_t));
	if(lbs == NULL) enomem();
	lb = lbs;
	linebuf_init(lb, 0);
	
    	/* The y position of every line is stored in an array so that
	 * page_print() can do some vertical formatting.
	 */
	lines_ypos = malloc(linecount * sizeof(sint32));
	if(lines_ypos == NULL) enomem();
	this_ypos = lines_ypos;
	*this_ypos = list_head->b.y;
	
	/* We collect the current run of not space-separated glyphs
	 * in last_word
	 */
	linebuf_init(&last_word, 0);
	
	prev_y = list_head->b.y;
	prev_tab = left_margin;
	prev_p = NULL;
        for (p = list_head; p != 0; prev_p = p, p = p->next) {

                pmesg(80, "node: X = %li, Y = %li, glyph = %li (%c), font = %li\n",
                      p->b.x, p->b.y, p->b.glyph, (unsigned char) p->b.glyph, p->b.font);

                if (p->b.height != 0) height = p->b.height;

                if (p->b.y > prev_y) {
                        pmesg(80, "end of line\n");
			linebuf_append(lb, &last_word);
			linebuf_init(++lb, 0);
			*(++this_ypos) = p->b.y;
                        prev_y = p->b.y;
			
			space_count = decide_space_count(prev_p, NULL);
			space_reserve = (space_count + 1) / 2;
			last_word_width = outenc_get_width(&last_word);
			scdf_force_min_integral(
			    &col_density,
			    prev_tab,
			    right_margin,
			    last_word_width + space_reserve
			);
			scdf_normalize(&col_density);
			linebuf_clear(&last_word);
			prev_tab = left_margin;
                }

                space_count = decide_space_count(prev_p, p);
                
		
    	    	if((space_count > 0) && (p->b.x != prev_tab)) {
		    /* Word beginnings are used as "tab stops" where
		     * physical position and printout column position
		     * get synchronized.
		     * We make sure there are enough columns to hold
		     * all characters since the last tab stop by requiring
		     * that the column density in between those tabs
		     * be high enough.
		     */
		    last_word_width = outenc_get_width(&last_word);
		    this_tab = p->b.x;
		    space_reserve = (space_count + 1) / 2;
			    /* This formula is a compromise. On one hand,
			     * we don't want to waste too much space with
			     * spaces :) but on the other, we'd like if
			     * larger portions of whitespace (e.g between
			     * columns) would show up in the printout as well.
			     */
		    pmesg(
		    	80,
			"prev_tab is %ld, this_tab is %ld, space_count %d\n",
			prev_tab,
			this_tab,
			space_count
		    );
		    scdf_force_min_integral(
		    	&col_density,
			prev_tab,
			this_tab,
			last_word_width + space_reserve
		    );
		    prev_tab = this_tab;

    	    	    linebuf_append(lb, &last_word);
		    linebuf_clear(&last_word);

    		    /* insert one space (just to make really sure we'll
		     * see one in the printout) and a tab stop in the
		     * line buffer
		     */
    	    	    linebuf_putg(lb, ' ');
		    linebuf_putg(lb, '\t');
		    linebuf_putg(lb, last_word_width + 1);
		    	/* +1 for the space */
		    linebuf_putg(lb, this_tab);
		}

                linebuf_putg(&last_word, p->b.glyph);
        }
	
    	/* End of page. We still have to end the last line */
    	space_count = decide_space_count(prev_p, NULL);
	space_reserve = (space_count + 1) / 2;
    	last_word_width = outenc_get_width(&last_word);
	scdf_force_min_integral(
	    &col_density,
	    prev_tab,
	    right_margin,
	    last_word_width + space_reserve
	);
	scdf_normalize(&col_density);
	
    	linebuf_append(lb, &last_word);

	/* debugging blurb */
    	if(msglevel >= 100) {
	    scdf_dump(&col_density);
    	    fprintf(
		stderr,
		"Estimated number of required columns is %10.9g\n",
		scdf_integral(&col_density, left_margin, right_margin)
	    );
	}

    	page_print(lbs, lines_ypos, linecount, &col_density);
        puts("\f\n");
    	/* clean up */
	linebuf_done(&last_word);
	for(lb = lbs; lb - lbs < linecount ; ++lb) linebuf_done(lb);
	free(lbs);
    	scdf_done(&col_density);
	    
        pmesg(50, "END page_end\n");
}

static void page_print(
    struct linebuf_t lines[],
    sint32 lines_ypos[],
    int nlines,
    struct scdf_t * col_density
)
{
    struct linebuf_t * curr_line;
    struct linebuf_t formatted;
    int col, ncols;
    sint32 pos;
    sint32 * colpos;
    int i, j;
    int nlfs;
    float line_density;
    
    pmesg(50, "BEGIN page_print\n");
    
    /* Find the positions of the printout columns */
    ncols = 3 + 2 * (int) scdf_integral(col_density, left_margin, right_margin);
    	/*
	 * +1 for the left magin, +1 for the fractional part, +1 since we'll
	 * have to look one cloumn to the right even from the last real column
	 * at the right margin and *2 to be on the very safe side wrt extra
	 * columns produced by rounding errors in the column-cutting algorithm.
	 *
	 * I haven't seen any extra columns yet but their number should be
	 * proportional to the really required number of columns.
	 */
    colpos = malloc(ncols * sizeof(sint32));
    if(colpos == NULL) enomem();
    
    pos = colpos[0] = left_margin;
    col = 1;
    while(pos < right_margin) {
    	errno = 0;
	pos = scdf_solve_integral_for_x1(col_density, pos, 1);
	if(errno == EDOM) pos = right_margin;
    	    /* EDOM means not enough density right from pos to
	     * give integral 1, i.e. we're nearly at the right margin.
	     */
	colpos[col++] = pos;
    }
    for(j = col; j < ncols; ++j) colpos[col] = right_margin;
    	/* The rest gets filled with position = right margin to avoid
	 * bombing out if rounding or other problems lead to a few
	 * more printout columns than expected. */
    ncols = col;
    pmesg(80, "ncols is %d\n", ncols);

    line_density = ((float) nlines) / (lines_ypos[nlines-1] - lines_ypos[0]);
    
    /* hardcore debugging stuff */
    if(msglevel >= 100) {
    	fputs("Dumping column positions:\n", stderr);
	for(j = 0; j < ncols - 1 ; ++j) {
    	    fprintf(
		stderr,
		"  col %3d pos = %10ld integral to next col = %10.9g\n",
		j,
		colpos[j],
		scdf_integral(col_density, colpos[j], colpos[j+1])
    	    );
	}
    }

    /* now reformat the lines and print them out */
    linebuf_init(&formatted, 0);

    for(i = 0; i < nlines; ++i) {
	
    	curr_line = lines + i;
	linebuf_clear(&formatted);
	col = 0;
	for(j = 0; j < curr_line->size_curr; ) {
	    glyph_t g;
	    
	    if(col >= ncols) {
	    	pmesg(10, "Oops: col = %d >= ncols = %d\n", col, ncols);
	    }
	    
	    g = curr_line->gstring[j++];
	    if(g == '\t') {
	    	col += curr_line->gstring[j++];
	    	pos = curr_line->gstring[j++];
		pmesg(80, "col %3d: tab to pos %10ld.\n", col, pos);
		if(pos < colpos[col]) {
	    	    pmesg(10,
		    "Oops: tab to %10ld < pos = %10ld, difference %10ld\n",
		    pos,
		    colpos[col],
		    colpos[col] - pos);
		}
	    	while(colpos[col+1] < pos) {
		    linebuf_putg(&formatted, ' ');
		    pmesg(
		    	80,
			"  Skipping col %3d at pos %10ld\n",
			col,
			colpos[col]
		    );
		    ++col;
		}
		pmesg(
		    80,
		    "col %3d: pos = %10ld, next pos = %10ld\n\n",
		    col,
		    colpos[col],
		    colpos[col+1]
		);
	    }
	    else {
	    	linebuf_putg(&formatted, g);
		pmesg(
		    80,
		    "col >= %3d: putting glyph U+%04lX [%c]\n",
		    col,
		    g,
		    (unsigned char) g &0xff
		);
	    }
	}
	
	outenc_write(stdout, &formatted);

	/* Approximate vertical distance to next line by line feeds */
	if(i+1 < nlines) {
	    nlfs = (int) ((lines_ypos[i+1] - lines_ypos[i]) * line_density
	    	+ 0.5);
	    nlfs = max(nlfs, 1);
	}
	else nlfs = 1;
	pmesg(80, "\nputting %3d linefeeds\n", nlfs);
        for (j = 0; j < nlfs; ++j) putc('\n', stdout);
    }
    	
    linebuf_done(&formatted);
    free(colpos);
    pmesg(50, "END page_print\n");
}


static void page_adjust_diacritics(void)
{
    int trouble = 0;
    int direction = 0;
    
    struct list_node_t *p, *s, *q, *pb;
    struct box_t dia_box;
    struct point_t dia_center;
        
    pmesg(50, "BEGIN page_adjust_diacritics\n");
    
    if(!page_has_diacritics) {
    	pmesg(80, "nothing to do\n");
    	pmesg(50, "END page_adjust_diacritics\n");
	return;
    }

    for (p = list_head; p != 0; p = s) {
    	s = p->next;
	    /* p-> is a moving target */
	
	if (!(p->b.flags & BF_DIACRITIC)) continue;

	trouble = 0;
	pb = 0;

    	dia_box = p->b;
	dia_center.x = dia_box.x + dia_box.width / 2;
	dia_center.y = dia_box.y - dia_box.height / 2;
    	
	/* Search corresponding base box. There are complex overlayed
	 * constructions we can't handle. Therefore, we try to find all
	 * possible candidates and abort when more than one is found.
	 */
	 
	/* search backwards */
	for(q = p->prev; q != 0; q = q->prev) {
	    if (abs(dia_box.y - q->b.y) > 3*(dia_box.height + q->b.height))
	    	/* too far off to find anything useful */
	    	break;
	    
	    if (point_in_box(&dia_center, &q->b)) {
	    	/* I got you babe */
    	    	if(pb != 0) {
		    /* we are not alone...too bad */
		    trouble = 1;
		    break;
		}
		pb = q;
		direction = -1;
	    }
	}
	
	if(trouble) {
	    pmesg(
	    	80,
		"page_adjust_diacritics: trouble with diacritic %#lx\n",
		dia_box.glyph
	    );
	    continue;
	}
	
	/* search forward */
	for(q = p->next; q != 0; q = q->next) {
	    if (abs(dia_box.y - q->b.y) > 3*(dia_box.height + q->b.height))
	    	/* too far off to find anything useful */
	    	break;
	    
	    if (point_in_box(&dia_center, &q->b)) {
	    	/* I got you babe */
    	    	if(pb != 0) {
		    /* we are not alone...too bad */
		    trouble = 1;
		    break;
		}
		pb = q;
		direction = 1;
	    }
	}
	
	if(trouble) {
	    pmesg(
	    	80,
		"page_adjust_diacritics: trouble with diacritic %#lx\n",
		dia_box.glyph
	    );
	    continue;
	}
	if(pb == 0) {
	    pmesg(
	    	80,
		"page_adjust_diacritics: no base glyph for diacritic %#lx\n",
		dia_box.glyph
	    );
	    /* a lone diacritic. assume it just belongs where it is */
	    p->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
	    continue;
	}
	
	/* Found the one and only one. Let's get closer. */
	page_pair_accenting(pb, p, direction);
    }

    pmesg(50, "END page_adjust_diacritics\n");
}


static void page_pair_accenting(
    struct list_node_t * pbase,
    struct list_node_t * pdiacritic,
    int direction
)
{
    glyph_t dcv;

    assert(pdiacritic->b.flags & BF_DIACRITIC);
    assert(abs(direction) == 1);
    
    pmesg(
	80,
	"detected accenting: base=%#lx, diacritic=%#lx\n",
	pbase->b.glyph,
	pdiacritic->b.glyph
    );

    dcv = diacritic_combining_variant(pdiacritic->b.glyph);
    
    if(dcv) {
    	/* OK this one can be handeled within the unicode framework */
	
	/* move diacritic right after base glyph */
	if(direction == -1) {
	    /* base before diacritic */
	    while(pdiacritic->prev != pbase) {
		swap_nodes(pdiacritic->prev, pdiacritic);
	    }
	}
	else {
	    /* base after diacritic */
	    while(pdiacritic->prev != pbase) {
		swap_nodes(pdiacritic, pdiacritic->next);
		    /* note swap_nodes() is order sensitive */
	    }
	}
	
    	/* Make the combining diacritic occupy the same space as the
	 * accented glyph would. That's the only way to statisfy all
	 * ordering and spacing assumptions in this program. And nearly
	 * consequent.
	 */
	pdiacritic->b.width = pbase->b.width;
	pdiacritic->b.height = max(
	    pbase->b.height,
	    pdiacritic->b.height + pbase->b.y - pdiacritic->b.y
	);
	pdiacritic->b.x = pbase->b.x;
	pdiacritic->b.y = pbase->b.y;
	pdiacritic->b.glyph = dcv;
	pdiacritic->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
    }
    else {
    	/* A strange guy. Won't combine. Just put him in line so he can't
	 * clobber up things. Like in real life.
	 */
	pdiacritic->b.height =
	    pdiacritic->b.height + pbase->b.y - pdiacritic->b.y;
	pdiacritic->b.y = pbase->b.y;
	pdiacritic->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
	if(!page_sequential) {
	    /* keep the list ordered */
	    list_latest = pdiacritic;
	    	/* hope this won't fsck up. Better change bubble()s
		 * parameter list instead */
	    bubble();
	}
    }

}


static void page_end_sequential(void)
{
    struct list_node_t * p;
    struct box_t prev_box, curr_box;

    sint32 delta_x = 0, delta_y = 0;
    sint32 dist_x = 0;
    sint32 epsilon_x = 0, epsilon_y = 0;

    /* begin-of-page flag */
    int bop = 1;
    /* begin-of-line flag */
    int bol = 1;
        
    int spaces, spaces_by_prev, spaces_by_curr;
    struct linebuf_t lb;    


    pmesg(50, "BEGIN page_end_sequential\n");

    linebuf_init(&lb, 0);
    prev_box.width = 1;
    prev_box.height = 1;
    
    for (p = list_head; p != 0; p = p->next) {

        pmesg(80, "node: X = %li, Y = %li, glyph = %li (%c), font = %li\n",
              p->b.x, p->b.y, p->b.glyph, (unsigned char) p->b.glyph, p->b.font);

    	curr_box = p->b;
        if (curr_box.width == 0) curr_box.width = prev_box.width;
        if (curr_box.height == 0) curr_box.height = prev_box.height;

	if (!bop) {
    	    delta_x = curr_box.x - prev_box.x;
	    delta_y = curr_box.y - prev_box.y;
	    dist_x = delta_x - prev_box.width;

    	    /* (arithmetic mean of current and previous) / 20 */
	    epsilon_x = (curr_box.width + prev_box.width + 39) / 40;
	    epsilon_y = (curr_box.height + prev_box.height + 39) / 40;

            /* check for new line */
	    if (
		/* new line */
		delta_y >= 22*epsilon_y ||
		/* new column */
		delta_y <= -60*epsilon_y ||
		/* weird step back */
		dist_x <= -100*epsilon_x
	    ) {
        	pmesg(80, "end of line\n");
		outenc_write(stdout, &lb);
		linebuf_clear(&lb);
		puts("");
		bol = 1;
            }
	    
	    /* check for word breaks, but not at the beginning of a line */
	    if ((dist_x > 0) && (!bol)) {
		spaces_by_prev =
		    font_w_to_space(prev_box.font, dist_x);
		spaces_by_curr =
		    font_w_to_space(curr_box.font, dist_x);
    		spaces = (spaces_by_prev + spaces_by_curr + 1) / 2;

        	if (spaces > 0) {
                    pmesg(80, "setting space\n");
                    linebuf_putg(&lb, ' ');
        	}
	    }
		
	} /* end if (!bop) */

        linebuf_putg(&lb, curr_box.glyph);
	prev_box = curr_box;
	bop = 0;
	bol = 0;
    }
    /* flush line buffer and end page */
    outenc_write(stdout, &lb);
    puts("\n\f\n");
    
    linebuf_done(&lb);
    
    pmesg(50, "END page_end_sequential\n");
}


/************************************************************************
 * the hairy texmext stuff
 ************************************************************************/

typedef struct interval32_t interval32_t;
struct interval32_t {
    sint32 a;
    sint32 b;
};

static int interval32_contains(const interval32_t * this, sint32 x)
{
    int res;
    
    res = (this->a <= x) && (x <= this->b);
    
    pmesg(
    	150,
	"interval32_contains: %ld %s [%ld, %ld]\n",
	x,
	res ? "in" : "not in",
	this->a,
	this->b
    );

    return res;
}


typedef struct searchinfo_t searchinfo_t;
struct searchinfo_t {
    interval32_t searched_y;
    interval32_t x;
    interval32_t y;
    interval32_t top;
    interval32_t axis;
    int require_axis;
    int test_start;
    struct list_node_t * start;
    struct list_node_t * first_found;
};

static int page_match_box(
    const struct searchinfo_t * quest,
    const struct list_node_t * candidate
)
{
    int matches;
    const struct box_t * cb;
    
    cb = &(candidate->b);

    /* The candidate's x, y and top ( == y - height) values must match the ones
     * we're searching for. If candidate has a known math axis, it must match.
     * require_axis says whether we require it to have a known math axis.
     */
    matches =
    	interval32_contains(&(quest->x), cb->x) &&
    	interval32_contains(&(quest->y), cb->y) &&
	interval32_contains(&(quest->top), cb->y - cb->height) &&
    	(
	    (cb->flags & BF_HAS_AXIS) ?
	    	interval32_contains(&(quest->axis), cb->axis) :
	    	!quest->require_axis
	);  /* yes these parentheses _are_ neccessary */

    return(matches);
}

/* Return values:
 * 0 - no box matches the given criteria
 * 1 - some boxes match the given criteria, and all of them have the same y
 * 2 - some boxes match the given criteria, but different y values occur.
 *
 * If some boxes match, quest->first_found will point to guess what.
 */
static int page_grb_unique_y(
    struct searchinfo_t * quest
)
{
    interval32_t * range;
    struct list_node_t * p;
    struct list_node_t * start_up, * start_down;
    sint32 y;
    int have_y = 0;


    range = &(quest->searched_y);
    assert(interval32_contains(range, quest->start->b.y));
    start_up = quest->test_start ? quest->start : quest->start->prev;
    start_down = quest->start->next;
    
    quest->first_found = NULL;
    
    for(p = start_up; p != NULL; p = p->prev) {
    	if(!interval32_contains(range, p->b.y)) break;
	if(p->b.flags & BF_SWIMMING) continue;
	if(page_match_box(quest, p)) {
	    if(have_y) {
	    	if(p->b.y != y) {
		    return 2;
		}
	    }
	    else {
	    	quest->first_found = p;
		y = p->b.y;
		have_y = 1;
	    }
	}
    }

    for(p = start_down; p != NULL; p = p->next) {
    	if(!interval32_contains(range, p->b.y)) break;
	if(p->b.flags & BF_SWIMMING) continue;
	if(page_match_box(quest, p)) {
	    if(have_y) {
	    	if(p->b.y != y) {
		    return 2;
		}
	    }
	    else {
	    	quest->first_found = p;
		y = p->b.y;
		have_y = 1;
	    }
	}
    }

    return have_y;
}


enum srestrict_t {
    SRS_MIN_VAL = -1,	/* always first */
    SRS_REQUIRE_AXIS,
    SRS_NARROW,
    SRS_VERY_NARROW,
    SRS_MOREMATH_SIDE,
    SRS_LESSMATH_SIDE,
    SRS_MAX_VAL     	/* always last */
};

static int srestrict_conflicts[SRS_MAX_VAL] = {
    0,      	    	    /* SRS_REQUIRE_AXIS */
    1 << SRS_VERY_NARROW,   /* SRS_NARROW */
    1 << SRS_NARROW,	    /* SRS_VERY_NARROW */
    1 << SRS_MOREMATH_SIDE, /* SRS_MOREMATH_SIDE */
    1 << SRS_LESSMATH_SIDE  /* SRS_LESSMATH_SIDE */
};


/* Recurse through all subsets of the set of search restrictions.
 * Return values:
 * 0 - No box conforms to the given set of restrictions. Cut that branch.
 * 1 - The given restrictions or a further refinement have lead to a unique
 *     y value.
 * 2 - There were boxes conforming to the given set of restrictions, but
 *     the refinements I've tried haven't lead to a unique y value.
 */
static int page_grb_recursion(
    struct searchinfo_t * quest,
    struct list_node_t * swimmer,
    enum srestrict_t try_restrict,
    int srestricts
)
{
    struct searchinfo_t newquest;
    int res;
    struct box_t * pb;
    sint32 d;
    
    static char indents[] = "                 ";
    char * indent;
    
    indent = indents + lengthof(indents) + try_restrict - SRS_MAX_VAL;
    
    pmesg(130, "%sBEGIN page_grb_recursion\n", indent);
    pmesg(150, "%spage_grb_recursion: try_restrict=%d\n", indent, try_restrict);

    if(try_restrict < 0) {
    	/* The set of applicable search restrictions is already settled.
	 * Really DO something.
	 */
	res = page_grb_unique_y(quest);
        pmesg(
	    150,
	    "%spage_grb_recursion: srestricts=%#4x, unique_y result %d\n",
	    indent,
	    srestricts,
	    res
	);
    	pmesg(130, "%sEND page_grb_recursion\n", indent);
	return res;
    }

    /* Try without imposing a new restriction first */
    res = page_grb_recursion(quest, swimmer, try_restrict - 1, srestricts);
    if(res <= 1) {
    	pmesg(130, "%sEND page_grb_recursion\n", indent);
    	return res;
    }
    
    /* This hasn't been restrictive enough, try adding "our" restriction. */
    
    if(srestrict_conflicts[try_restrict] & srestricts) {
    	/* We can't add "our" restriction - conflict. But our caller could
	 * try others.
	 */
    	pmesg(150, "%spage_grb_recursion: tried conflicting restrictions\n", indent);
    	pmesg(130, "%sEND page_grb_recursion\n", indent);
	return(2);
    }

    pb = &(swimmer->b);
    d = ((pb->height + pb->depth) + pb->width) / 20;
    	/* (arithmetic mean of total height and width of swimmer)/10 */
    newquest = *quest;
    switch(try_restrict) {
    	case SRS_REQUIRE_AXIS:
	    newquest.require_axis = 1;
	    break;
	case SRS_NARROW:
	    newquest.x.a = max(newquest.x.a, pb->x - 30*d);
	    newquest.x.b = min(newquest.x.b, pb->x + 40*d);
	    	/* 10 for the swimmer itself */
	    break;
	case SRS_MOREMATH_SIDE:
	    if(glyph_get_hint(pb->glyph) & GH_MOREMATH_LEFT) {
	    	newquest.x.b = pb->x + pb->width;
	    }
	    else {
	    	newquest.x.a = pb->x;
	    }
	    break;
	case SRS_LESSMATH_SIDE:
	    if(glyph_get_hint(pb->glyph) & GH_MOREMATH_LEFT) {
	    	newquest.x.a = pb->x;
	    }
	    else {
	    	newquest.x.b = pb->x + pb->width;
	    }
	    break;
	case SRS_VERY_NARROW:
	    newquest.x.a = max(newquest.x.a, pb->x - 15*d);
	    newquest.x.b = min(newquest.x.b, pb->x + 25*d);
	    	/* 10 for the swimmer itself */
	    break;
	default:
	    NOTREACHED;
    }

    res = page_grb_recursion(
    	&newquest,
	swimmer,
	try_restrict - 1,
	srestricts | (1 << try_restrict)
    );

    quest->first_found = newquest.first_found;
    pmesg(130, "%sEND page_grb_recursion\n", indent);
    if(res == 1) return 1;
    else return 2;

}


/* The fonts in "TeX math extension" encoding are somewhat different from
 * the others. The glyphs are centered on the "math axis" (about the height
 * of a minus sign above the baseline of the surrounding text/formula). The
 * y coordinate of the glyphs reference point is mostly meaningless (and in
 * Knuths cmex fonts the glyphs are "hanging down" from the reference
 * point). We know the math axis: its the arithmethic mean of (y - height)
 * and (y + depth). But we have to move the glyph to the _baseline_ for
 * proper text formatting, hence we need to deduce the baseline somehow.
 *
 * TeX gets the height of the math axis above the baseline from a parameter
 * named axis_height in the font metrics of the currently used math symbol
 * font. This parameter is only present in TFM files for fonts with "TeX math
 * symbols" encoding and it is not easy to be sure which axis height was
 * in effect when the math extension glyph was set, given only the DVI file.
 *
 * Therefore, we try another approach first: we look at the surrounding
 * boxes. If every box in some reasonable neighbourhood that is dissected
 * by the math axis has the same baseline, this must be the baseline of
 * the formula. Only if this approach fails we resort to guessing
 * axis_height by looking at the loaded math symbol fonts.
 */
static struct list_node_t * page_guess_reference_box(
    struct list_node_t * swimmer
)
{
    struct searchinfo_t quest;
    struct box_t * pb;
    
    pmesg(50, "BEGIN page_guess_reference_box\n");
    pmesg(
    	80,
	"page_guess_reference_box: y=%ld, axis=%ld\n",
	swimmer->b.y,
	swimmer->b.axis
    );

    pb = &(swimmer->b);
    
    assert(pb->flags & BF_SWIMMING);
    assert(pb->flags & BF_HAS_AXIS);

    quest.x.a = SINT32_MIN;
    quest.x.b =  SINT32_MAX;
    quest.y.a = pb->axis;
    quest.y.b = pb->y + pb->depth;
    quest.top.a = SINT32_MIN;
    quest.top.b = pb->axis;
    quest.axis.a = pb->axis - 2;
    quest.axis.b = pb->axis + 2; /* allow small rounding errors */
    quest.require_axis = 0;
    quest.test_start = 0;
    quest.start = swimmer;


    if(page_sequential) {
    	sint32 h;
	
    	h = pb->height + pb->depth;
	quest.searched_y.a = pb->axis - 3*h;
	quest.searched_y.b = pb->axis + 3*h;
    }
    else {
    	quest.searched_y.a = min(pb->axis, pb->y);
    	quest.searched_y.b = pb->y + pb->depth;
    }

    if(page_grb_recursion(&quest, swimmer, SRS_MAX_VAL - 1, 0) == 1) {
    	pmesg(50, "END page_guess_reference_box\n");
    	return quest.first_found;
    }
    else {
    	pmesg(50, "END page_guess_reference_box\n");
    	return NULL;
    }

}


static void page_adjust_texmext(void)
{
    struct list_node_t * p, * s, * ref;
    sint32 new_y, delta;
        
    pmesg(50, "BEGIN page_adjust_texmext\n");

    if(!page_has_texmext) {
    	pmesg(80, "nothing to do\n");
    	pmesg(50, "END page_adjust_texmext\n");
	return;
    }

    for (p = list_head; p != 0; p = s) {
    	s = p->next;
	    /* p-> is a moving target */
	
	if (!(p->b.flags & BF_ON_AXIS)) continue;
	assert(p->b.flags & BF_HAS_AXIS);
	assert(p->b.flags & BF_SWIMMING);

    	pmesg(
	    80,
	    "page_adjust_texmext: glyph=%#6lx, y=%ld\n",
	    p->b.glyph,
	    p->b.y
	);
	
	if(p->b.axis_height > 0) {
	    ref = NULL;
	    pmesg(
	    	80,
		"page_adjust_texmext: known axis_height=%ld\n",
		p->b.axis_height
	    );
	}
	else {
    	    ref = page_guess_reference_box(p);
	    pmesg(
	    	80,
		"page_adjust_texmext:%s reference box found\n",
		ref ? "" : " no"
	    );
	}

	if(ref != NULL)  new_y = ref->b.y;
	else new_y = p->b.axis + abs(p->b.axis_height);
	pmesg(80, "page_adjust_texmext: new_y=%ld\n", new_y);
	
	delta = new_y - p->b.y;
	p->b.y = new_y;
	p->b.height += delta;
	p->b.depth -= delta;
	p->b.axis_height = p->b.y - p->b.axis;
	p->b.flags &= ~(BF_SWIMMING | BF_ON_AXIS);
	
	if(!page_sequential) {
	    list_latest = p;
	    bubble();
	}

    }

    pmesg(50, "END page_adjust_texmext\n");
}


static struct list_node_t * page_find_lowest_box(struct searchinfo_t * quest)
{
    interval32_t * range;
    struct list_node_t * p, * low;
    struct list_node_t * start_up, * start_down;

    range = &(quest->searched_y);
    assert(interval32_contains(range, quest->start->b.y));
    start_up = quest->test_start ? quest->start : quest->start->prev;
    start_down = quest->start->next;
    
    quest->first_found = NULL;
    low = NULL;
    
    for(p = start_up; p != NULL; p = p->prev) {
    	if(!interval32_contains(range, p->b.y)) break;
	if(p->b.flags & BF_SWIMMING) continue;
	if(page_match_box(quest, p)) {
	    if(low == NULL || p->b.y > low->b.y) low = p;
	}
    }

    for(p = start_down; p != NULL; p = p->next) {
    	if(!interval32_contains(range, p->b.y)) break;
	if(p->b.flags & BF_SWIMMING) continue;
	if(page_match_box(quest, p)) {
	    if(low == NULL || p->b.y > low->b.y) low = p;
	}
    }

    quest->first_found = low;
    return low;
}


/* TeX's radical signs are hanging down from their reference point. We move
 * them to the baseline of the lowest glyph in the radicant (that's where
 * the bottom tip ought to be) or, if there's no radicant, to the y position
 * of the bottom tip.
 */
static void page_adjust_radicals(void)
{
    struct list_node_t * p, * s, * lowest;
    sint32 new_y, delta;
    struct searchinfo_t quest;
        
    pmesg(50, "BEGIN page_adjust_radicals\n");

    if(!page_has_radicals) {
    	pmesg(80, "nothing to do\n");
    	pmesg(50, "END page_adjust_radicals\n");
	return;
    }

    for (p = list_head; p != 0; p = s) {
    	s = p->next;
	    /* p-> is a moving target */
	
	if (!(p->b.flags & BF_RADICAL)) continue;
	assert(p->b.flags & BF_SWIMMING);

    	pmesg(
	    80,
	    "page_adjust_radicals: glyph=%#6lx, y=%ld\n",
	    p->b.glyph,
	    p->b.y
	);

    	/* put the radical on the baseline of the lowest box in the radicant */
	quest.x.a = p->b.x;
	quest.x.b =  SINT32_MAX;
	    /* FIXME: Once we have rudimentary rules support, we can search
	     * for the rule forming the radicals top and take its length as
	     * the width of the radicant.
	     */
	quest.y.a = p->b.y - p->b.height;
	quest.y.b = p->b.y + p->b.depth;
	quest.top = quest.y;
	quest.axis.a = SINT32_MIN;
	quest.axis.b = SINT32_MAX;
	quest.require_axis = 0;
	quest.test_start = 0;
	quest.start = p;

	if(page_sequential) {
	    sint32 h;

    	    h = p->b.height + p->b.depth;
	    quest.searched_y.a = p->b.y - h;
	    quest.searched_y.b = p->b.y + 2*h;
	}
	else {
    	    quest.searched_y = quest.y;
	}

    	lowest = page_find_lowest_box(&quest);
	pmesg(
	    80,
	    "page_adjust_radicals:%s lowest box found\n",
	    lowest ? "" : " no"
	);

	if(lowest != NULL)  new_y = lowest->b.y;
	else new_y = p->b.y + p->b.depth;
	pmesg(80, "page_adjust_radicals: new_y=%ld\n", new_y);
	
	delta = new_y - p->b.y;
	p->b.y = new_y;
	p->b.height += delta;
	p->b.depth -= delta;
	p->b.flags &= ~(BF_SWIMMING | BF_RADICAL);
	
	if(!page_sequential) {
	    list_latest = p;
	    bubble();
	}

    }

    pmesg(50, "END page_adjust_radicals\n");
}
