/*
Magpie - reference librarian for Debian systems
Copyright (C) 2000  Bear Giles <bgiles@coyotesong.com>

This program is free software; you may redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the license, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

static const char rcsid[] = "$Id$";

/*****
This module performs a simple seach for keywords in the description
field, and if they are present adds the package to an output file.
*****/
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <stdlib.h>
#include <ctype.h>
#include <time.h>
#include <locale.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include "magpie.h"

#define OUTPUT_DIR	"keywords"
#define OUTPUT_FILE	"keywords/index.html"

#define MAX_KEYWORDS	200

static char *words[MAX_KEYWORDS];
static int wordlen[MAX_KEYWORDS];
static int cnt[MAX_KEYWORDS];
static int keycount;

/*+
Comparison function for sorting by package name, version (descending),
section, category.  It is used prior to creating the initial set of
HTML files.
+*/
static int cmp_package_name (const void *p, const void *q)
{
	struct package_info *pp = *((struct package_info **) p);
	struct package_info *qq = *((struct package_info **) q);
	int r;
	
	assert (pp->name);
	assert (qq->name);

	r = strcoll (pp->name, qq->name);
	if (r)
		return r;

	r = strcoll (pp->version, qq->version);
	if (r)
		return -r;

	r = pp->section - qq->section;
	if (r)
		return r;

	return pp->category - qq->category;
}


/*+
+*/
static int keywords_cleanup (void)
{
	int i;

	for (i = 0; i < MAX_KEYWORDS; i++) {
		if (words[i]) {
			free (words[i]);
			words[i] = 0;
		}
	}

	return 0;
}


/*+
+*/
static int keywords_index (FILE *fp0, int type)
{
	FILE *fp;
	int i;

	if (type != MAGPIE_ALL_PACKAGES)
		return 0;

	mp_title_open (fp0, 3, "");
	mp_url (fp0, "%s.gz", "Grouped by keyword", OUTPUT_FILE);
	mp_title_close (fp0, 3);
	
	fp = fopen (OUTPUT_FILE, "w");
	mp_doc_open (fp, "All packages, grouped by keyword");

	mp_list_open (fp0);
	mp_list_open (fp);
	for (i = 0; i < keycount; i++) {
		mp_item_open (fp);
		mp_url (fp, "%1$s.html.gz", "%1$s", words[i]);
		mp_nbsp (fp);
		fprintf (fp, "(%d)", cnt[i]);
		mp_item_close (fp);

		mp_item_open (fp0);
		mp_url (fp0, "%1$s/%2$s.html.gz", "%2$s", OUTPUT_DIR, words[i]);
		mp_nbsp (fp0);
		fprintf (fp0, "(%d)", cnt[i]);
		mp_item_close (fp0);
	}
	mp_list_close (fp);
	mp_list_close (fp0);

	mp_doc_close (fp);
	fclose (fp);
	gzip (OUTPUT_FILE);

	return 0;
}


/*+
+*/
static int keywords_init (void)
{
	FILE *fp;
	int i, j, n;
	struct package_info *p;
	char pathname[256];
	char buffer[8192];	/* 3000 max. used in Debian 2.2 */
	char *s, ch;

	/*
	 *	Read list of keywords from file.
	 */
	keycount = 0;
	fp = fopen ("/etc/magpie/keywords", "r");
	if (fp == 0)
		return -1;

	while (keycount < MAX_KEYWORDS && 
		fgets (buffer, sizeof buffer, fp) != NULL) {
		if (buffer[0] == '#' || buffer[0] == '\n')
			continue;

		n = strlen (buffer);
		for (i = 0; i < n; i++)
			buffer[i] = tolower (buffer[i]);
		buffer[n-1] = '\0';
		words[keycount] = strdup (buffer);
		wordlen[keycount] = strlen (buffer);
		cnt[keycount] = 0;
		keycount++;
	}
	fclose (fp);

	qsort (cache, cachecnt, sizeof (cache[0]), cmp_package_name);
	mkdir (OUTPUT_DIR, 0755);

	for (j = 0; j < keycount; j++) {
		sprintf (pathname, "%s/%s.html", OUTPUT_DIR, words[j]);
		fp = fopen (pathname, "w");
		mp_doc_open (fp, "Packages Containing Keyword '%s'", words[j]);
		fclose (fp);
	}

	for (i = 0; i < cachecnt; i++) {
		p = cache[i];
		n = 0;

		/* Put the description into a single buffer */
		for (j = 0; j < p->desccnt; j++) {
			n += snprintf (&buffer[n], sizeof buffer - n, 
				"%s ", p->description[j]);
		}
		/* n contains length of description.  Hmmm... */

		/* now we cheat compared to KWIC approach */
		for (j = 0; j < n; j++)
			buffer[j] = tolower (buffer[j]);

		/*
		 * check for the keyword in the description.  To reduce
		 * false hits, we check the character immediately before and
		 * after any matching substring.  We reject the match if 
		 * either character is a letter, with an exception for
		 * plural forms.
		 *
		 * The best way to understand these rules is to search for
		 * information on Apple computers "keyword: mac") and consider
		 * how to recognize "MACs" but not "eMACs".
		 */
		for (j = 0; j < keycount; j++) {
			s = &buffer[1];
			while ((s = strstr (s, words[j])) != NULL) {
				ch = s[wordlen[j]];
				if (isalpha (s[-1])) {
					s++;
					continue;
				}
				else if (isalpha (ch) && ch != 's' && ch != 'e') {
					s++;
					continue;
				}

				sprintf (pathname, "%s/%s.html", OUTPUT_DIR, words[j]);
				fp = fopen (pathname, "a");
				mp_package (fp, p, 1, 0);
				cnt[j]++;
				fclose (fp);
				break;
			}
		}
	}

	for (j = 0; j < keycount; j++) {
		sprintf (pathname, "%s/%s.html", OUTPUT_DIR, words[j]);
		fp = fopen (pathname, "a");
		mp_doc_close (fp);
		fclose (fp);

		gzip (pathname);
	}

	return 0;
}


struct magpie_module mod_keywords = { 
	version           : MAGPIE_VERSION,
	description       : "group packages by keyword",
	init              : keywords_init,
	cleanup           : keywords_cleanup,
	annotated_index   : keywords_index
};
