/*
** 1998-08-02 -	This module deals with file types. It provides services to initialize the
**		default types, add/delete/move previously created types, and even using all
**		this type information to analyze files. This is useful stuff.
** 1998-08-15 -	Mild redesign; all functions now take a "generic" GList rather than a CfgInfo.
**		This allows use of these functions on styles _not_ sitting on the global
**		CfgInfo.style list. Very handy when editing.
** 1998-08-26 -	Massive hacking to implement support for 'file' RE matching. Got a lot nicer
**		than I had first expected, actually. If used, 'file' is always envoked
**		exactly once for each dirpane. This cuts down on the overhead of 'file'
**		reading and parsing its rather hefty (~120 KB on my system) config file.
** 1998-09-15 -	Added support for case-insensitive regular expressions.
** 1998-09-16 -	Added priorities to file types, controlling the order in which they are
**		checked (and listed, of course). Priorities are in 0..254, which I really
**		think should be enough. If I'm wrong, I'll just square that number. :) 0 is
**		the highest priority (which should explain why 255 is reserved for "Unknown").
** 1998-09-18 -	Regular expressions are now handled by the POSIX code in the standard C library.
**		No longer any need for Henry Spencer's code. Feels good.
** 1998-12-13 -	Priorities removed. Types now explicitly ordered by user in config.
*/

#include "gentoo.h"

#include <ctype.h>
#include <fcntl.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/wait.h>
#include <unistd.h>

#include <stddef.h>

#include "errors.h"
#include "strutil.h"
#include "styles.h"
#include "fileutil.h"
#include "types.h"

/* ----------------------------------------------------------------------------------------- */

/* Collected variables that deal with 'file' into a struct, for clarity. */
static struct
{
	gboolean		file_used;		/* Any types using 'file' active? */
	GSList			*files;			/* Current list of files to inspect. */
	gboolean		sigpipe_installed;	/* SIGPIPE handler installed? */
	volatile sig_atomic_t	sigpipe_occured;	/* Did we just catch SIGPIPE? */
} file_info;

/* ----------------------------------------------------------------------------------------- */

/* 1998-08-02 -	Create a new type, with the given <name> and identification strings. Use
**		NULL for an identifier that should not be used. Returns a pointer to a new
**		FType structure, or NULL on failure.
** 1998-08-11 -	Now takes the name of a <style> to apply to files of this type, too.
** 1998-09-07 -	Added another argument, for the new permissions support. That's eight
**		arguments; pretty close to my personal limit. :)
** 1999-05-29 -	Removed the <style> argument again, since it was too complex. Use typ_type_set_style().
*/
FType * typ_type_new(CfgInfo *cfg, const gchar *name, mode_t mode, gint perm, const gchar *suffix, const gchar *name_re, const gchar *file_re)
{
	FType	*type;

	if((type = g_malloc(sizeof *type)) != NULL)
	{
		stu_strncpy(type->name, name, sizeof type->name);
		type->mode  = mode;
		type->perm  = perm;
		type->flags = 0UL;
		type->suffix[0] = '\0';
		type->name_re_src[0] = '\0';
		type->name_re = NULL;
		type->file_re_src[0] = '\0';
		type->file_re = NULL;
		if(perm != 0)
			type->flags |= FTFL_REQPERM;

		if(suffix != NULL)
		{
			stu_strncpy(type->suffix, suffix, sizeof type->suffix);
			type->flags |= FTFL_REQSUFFIX;
		}
		if(name_re != NULL)
		{
			stu_strncpy(type->name_re_src, name_re, sizeof type->name_re_src);
			type->flags |= FTFL_NAMEMATCH;
		}
		if(file_re != NULL)
		{
			stu_strncpy(type->file_re_src, file_re, sizeof type->file_re_src);
			type->flags |= FTFL_FILEMATCH;
		}
		type->style = NULL;
	}
	return type;
}

/* 1998-08-14 -	Create a copy of the <old> type. Has the side-effect of clearing all compiled
**		regular expressions in the original (and the copy).
*/
FType * typ_type_copy(FType *old)
{
	FType	*nt;

	if((nt = g_malloc(sizeof *nt)) != NULL)
	{
		if(old->name_re != NULL)
		{
			regfree(old->name_re);
			g_free(old->name_re);
			old->name_re = NULL;
		}
		if(old->file_re != NULL)
		{
			regfree(old->file_re);
			g_free(old->file_re);
			old->file_re = NULL;
		}
		*nt = *old;
	}
	return nt;
}

void typ_type_destroy(FType *type)
{
	if(type->name_re != NULL)
	{
		regfree(type->name_re);
		g_free(type->name_re);
	}
	if(type->file_re != NULL)
	{
		regfree(type->file_re);
		g_free(type->file_re);
	}
	g_free(type);
}

/* ----------------------------------------------------------------------------------------- */

/* 2009-03-13 -	Return the Unknown type. */
FType * typ_type_get_unknown(const CfgInfo *cfg)
{
	const GList	*iter;

	for(iter = cfg->type; iter != NULL; iter = g_list_next(iter))
	{
		if(TYP_IS_UNKNOWN((const FType *) iter->data))
			return iter->data;
	}
	return NULL;
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-09-07 -	Check if the file described by <line> matches the permissions requirements
**		in <type>. Returns 1 if so, otherwise 0.
** 1999-03-14 -	Made the access to the DirRow (previously DirLine) a lot more abstract.
*/
static gint check_perm(const FType *type, DirRow *row)
{
	gint	pr = 1, pw = 1, px = 1;
	mode_t	mode = 0;
	uid_t	uid;
	gid_t	gid;

	/* First build mode mask, for SetUID, SetGID and sticky. */
	if(type->perm & FTPM_SETUID)
		mode |= S_ISUID;
	if(type->perm & FTPM_SETGID)
		mode |= S_ISGID;
	if(type->perm & FTPM_STICKY)
		mode |= S_ISVTX;

	/* Now we know the mode requirements - check if fulfilled. */
	if((mode != 0) && ((DP_ROW_STAT(row).st_mode & mode) != mode))
		return 0;

	uid = geteuid();
	gid = getegid();

	/* Still here? Fine, then do (unorthodox, user-centric) read/write/execute permission checks. */
	if(type->perm & FTPM_READ)
		pr = fut_can_read(&DP_ROW_STAT(row), uid, gid);
	if(type->perm & FTPM_WRITE)
		pw = fut_can_write(&DP_ROW_STAT(row), uid, gid);
	if(type->perm & FTPM_EXECUTE)
		px = fut_can_execute(&DP_ROW_STAT(row), uid, gid);

	return pr && pw && px;
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-09-15 -	Check the RE in <re>, which has source <re_src>, against <string>.
**		Returns 1 on match, 0 on failure.
*/
static gint check_re(const gchar *re_src, regex_t **re, gboolean glob, gboolean nocase, const gchar *string)
{
	gchar	*glob_re = NULL;

	if(*re == NULL)					/* RE not compiled? */
	{
		if(glob)
		{
			glob_re = stu_glob_to_re(re_src);
			re_src = glob_re;
		}
		*re = g_malloc(sizeof **re);
		regcomp(*re, re_src, REG_EXTENDED | REG_NOSUB | (nocase ? REG_ICASE : 0));
		if(glob_re)
			g_free(glob_re);			/* Free the globbed version. */
	}
	if(*re != NULL)
		return regexec(*re, string, 0, NULL, 0) != REG_NOMATCH;
	return REG_NOMATCH;	/* FIXME: Weird! */
}

/* 1998-08-02 -	Attempt to apply the identification rules given in by the type <type> to the
**		dirpane line in <user>. The string <fout> is output from 'file' on this very file,
**		or NULL if 'file' hasn't yet been run. Returns TRUE if there is indeed a match. Note how
**		regular expressions are compiled if they're not already, and that the compiled
**		version is then kept for future use. This places some demands on the memory
**		handling later (config etc) but I'll deal with that then...
** 1998-08-26 -	Extended. Now actually does check against 'file' output.
** 1998-08-30 -	Now supports glob->RE translations for name and file matches. Smooth.
** 1998-09-15 -	Moved the actual RE checking out to a function of its own, since it was getting complex.
** 1999-03-14 -	Replaced DirLine with new, opaque, DirRow.
*/
static gint check_type(FType *type, DirRow *row, const gchar *fout)
{
	guint		tries = 0, hits = 0;
	const gchar	*name;

	/* Everything matches the unknown type, and that's a fact. */
	if(TYP_IS_UNKNOWN(type))
		return 1;

	/* For links, get actual name (without path) and use that. */
	if(S_ISLNK(DP_ROW_LSTAT(row).st_mode))
	{
		if((name = strrchr(DP_ROW_LINKNAME(row), G_DIR_SEPARATOR)) == NULL)
			name = DP_ROW_LINKNAME(row);
		else
			name++;
	}
	else
		name = DP_ROW_NAME(row);

	/* Apply the mode test first, since it's no doubt the fastest. */
	if(((DP_ROW_STAT(row).st_mode) & S_IFMT) != type->mode)
		return 0;
	if(type->flags & FTFL_REQPERM)
	{
		tries++;
		hits += check_perm(type, row);
	}
	if(type->flags & FTFL_REQSUFFIX)
	{
		tries++;
		hits += stu_has_suffix(name, type->suffix);
	}
	if(type->flags & FTFL_NAMEMATCH)
	{
		tries++;
		hits += check_re(type->name_re_src, &type->name_re, type->flags & FTFL_NAMEGLOB, type->flags & FTFL_NAMENOCASE, name);
	}
	if(type->flags & FTFL_FILEMATCH)
	{
		tries++;
		if(fout != NULL)
			hits += check_re(type->file_re_src, &type->file_re, type->flags & FTFL_FILEGLOB, type->flags & FTFL_FILENOCASE, fout);
	}
	return tries == hits;
}

/* 1998-08-02 -	Identify the <line>, i.e. attempt to find an FType that matches it, and assign
**		a pointer to that FType to the line's "type" field. If no user-defined type
**		is found, we assign the root type ("Unknown").
** 1998-08-26 -	Now returns TRUE if a "sophisticated" type was indeed found, FALSE if the "Unknown"
**		type had to do. Also has become 'static', since you can't call this outside of
**		the framework established by typ_identify_begin()/typ_idenfify_end(). Note use
**		of NULL as third arg to check_type(), since we haven't started running 'file'
**		yet.
** 1999-03-14 -	Replaced DirLine with DirRow.
*/
static gint identify(MainInfo *min, DirRow *row)
{
	const GList	*here;

	for(here = min->cfg.type; here != NULL; here = g_list_next(here))
	{
		if(check_type((FType *) here->data, row, NULL))
		{
			DP_ROW_TYPE(row) = here->data;
			break;
		}
	}
	return !TYP_IS_UNKNOWN((FType *) here->data);
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-08-26 -	A rewrite of the much of the (three weeks old) file typing architecture, in
**		order to accomodate use of 'file' in a way even resembling something efficient.
**		Rather than just calling typ_identify() on a dirline, you call first this
**		function, which initializes some state. You then, repeatedly, call typ_identify()
**		for all lines you wish to type. Then call typ_identify_end(). Done!
*/
void typ_identify_begin(MainInfo *min)
{
	GList	*here;

	if(file_info.files != NULL)
		fprintf(stderr, "**TYPES: Attempted to nest calls to typ_identify_begin()!\n");

	for(here = min->cfg.type; here != NULL; here = g_list_next(here))
	{
		if(((FType *) here->data)->flags & FTFL_FILEMATCH)
			break;
	}
	file_info.file_used = (here != NULL);
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-08-26 -	Identify the given line. If identification fails, i.e. if the "Unknown" type is
**		assigned to the line, remember the line for later exposure to the external
**		'file' command.
*/
void typ_identify(MainInfo *min, DirRow *row)
{
	if(!identify(min, row) && file_info.file_used)
		file_info.files = g_slist_prepend(file_info.files, row);
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-08-26 -	Attempt to match all types in list against <line>, knowing that 'file' said
**		<fout>. Only types that include a 'file'-matching RE are checked, of course.
**		Returns the matching type if any, or NULL if there's no match.
*/
static FType * match_file(GList *list, DirRow *row, const gchar *fout)
{
	FType	*type;

	for(; (list != NULL) && (type = (FType *) list->data); list = g_list_next(list))
	{
		if(check_type(type, row, fout))
			return type;
	}
	return NULL;
}

/* 2009-03-11 -	Trivial SIGPIPE handler, that just sets a global flag. */
static void sigpipe_handler(int sig)
{
	if(sig != SIGPIPE)
		return;
	file_info.sigpipe_occured = 1;
}

/* 2009-03-11 -	Installs a signal handler for SIGPIPE, so we can catch broken 'file' runs. */
static gboolean install_handler(void)
{
	struct sigaction	act;

	if(file_info.sigpipe_installed)
		return TRUE;

	act.sa_handler = sigpipe_handler;
	sigemptyset(&act.sa_mask);
	act.sa_flags	= 0;

	return file_info.sigpipe_installed = (sigaction(SIGPIPE, &act, NULL) == 0);
}

/* 1998-08-26 -	End of batched identification. If any unknown's were found *and* we know that
**		there are 1 or more types using the 'file' recognition support, we need to
**		call 'file'. The idea here is that we only want to execute the 'file' command
**		once, no matter how many files need identification. Also, we use redirection
**		of stdin for 'file' to supply it with file names to check (via its "-f -" option),
**		since writing out the names to disk seems inefficient.
** 2009-03-08 -	Totally rewritten, now uses glib's fancy spawning to run the subprocess, and
**		keeps it all going in pipes, without touching disk. This removes the need for
**		a temporary filename, and should improve performance.
*/
void typ_identify_end(MainInfo *min, const gchar *path)
{
	gchar	*argv[] = { "file", "-n", "-f", "-", NULL };
	GPid	file_pid;
	gint	file_stdin, file_stdout;
	GError	*error = NULL;

	if(file_info.files == NULL)
		return;

	install_handler();

	if(g_spawn_async_with_pipes(NULL, argv, NULL, G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL,
				    NULL, NULL, &file_pid, &file_stdin, &file_stdout, NULL, &error))
	{
		FILE	*in;

		if((in = fdopen(file_stdout, "rt")) != NULL)
		{
			GSList	*here;

			for(here = file_info.files; here != NULL; here = g_slist_next(here))
			{
				gchar	buf[FILENAME_MAX + 256];
				size_t	len, to_go;
				ssize_t	wrote;

				len = g_snprintf(buf, sizeof buf, "%s\n", DP_SEL_NAME(here));
				if(len >= sizeof buf)	/* Handle overflow by just skipping that file. */
					continue;
				for(to_go = len; to_go > 0; to_go -= wrote)
				{
					file_info.sigpipe_occured = 0;
					err_clear(min);
					wrote = write(file_stdin, buf + (len - to_go), to_go);
					if(errno == EPIPE)
						file_info.sigpipe_occured = 1;
					if(wrote < 0)
						break;
					else if(file_info.sigpipe_occured)
						break;
				}
				if(file_info.sigpipe_occured)
					break;
				if(fgets(buf, sizeof buf, in) != NULL)
				{
					const char	*fout;
					FType		*type;

					if((fout = strchr(buf, ':')) == NULL)
						continue;
					for(fout++; isspace(*fout); fout++)
						;
					if((type = match_file(min->cfg.type, DP_SEL_ROW(here), fout)) != NULL)
						DP_SEL_TYPE(here) = type;
				}
			}
			fclose(in);
		}
		close(file_stdin);
		close(file_stdout);
		g_spawn_close_pid(file_pid);
		err_clear(min);
		if(file_info.sigpipe_occured)
		{
			/* FIXME: This is problematic; the nice printf() text is often lost due to free space printing. */
			err_printf(min, _("Got SIGPIPE when writing to 'file' process (%s), it seems to have terminated prematurely."), argv[0]);
			/* FIXME: As a last resort, this at leas "sticks" so an error gets shown when we get out. */
			errno = EPIPE;
		}
	}
	else
	{
		g_prefix_error(&error, _("Unable to run spawn 'file' command: "));
		err_printf(min, "%s", error->message);
		g_error_free(error);
	}
	g_slist_free(file_info.files);
	file_info.files = NULL;
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-08-02 -	Initialize the file typing subsystem with some simple default types.
** 1998-09-02 -	Painlessly diked out all but "Unknown" and "Directory", since I think these
**		two are the only ones that are going to be built-in.
*/
void typ_init(CfgInfo *cfg)
{
	FType	*type;

	cfg->type = NULL;
	if((type = typ_type_new(cfg, _("Unknown"), 0, 0, NULL, NULL, NULL)) != NULL)
	{
		cfg->type = typ_type_insert(cfg->type, NULL, type);
		cfg->type = typ_type_set_style(cfg->type, type, cfg->style, NULL);
	}
	if((type = typ_type_new(cfg, _("Directory"), S_IFDIR, 0, NULL, NULL, NULL)) != NULL)
	{
		cfg->type = typ_type_insert(cfg->type, NULL, type);
		cfg->type = typ_type_set_style(cfg->type, type, cfg->style, _("Directory"));
	}
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-12-14 -	Rewritten another time. Now never inserts anything after the "Unknown" type.
**		Pretty lazily implemented, but so?
** 1999-01-09 -	Rewritten again. Now takes an additional <after> argument, and inserts <typ>
**		right after it. If <after> is the "Unknown", we insert before it. If it's
**		NULL, the same thing happens.
*/
GList * typ_type_insert(GList *list, FType *after, FType *type)
{
	gint	li;

	if(after == NULL || TYP_IS_UNKNOWN(after))		/* No reference element given, or "Unknown" ref? */
	{
		GList	*last;

		if((last = g_list_last(list)) != NULL)
		{
			if(TYP_IS_UNKNOWN((FType *) last->data))
			{
				li = g_list_index(list, last->data);
				return g_list_insert(list, type, li);
			}
		}
		return g_list_append(list, type);
	}
	li = g_list_index(list, after);
	return g_list_insert(list, type, li + 1);
}

/* 1998-12-14 -	Remove a type from the list, and return the new list. Not strictly
**		necessary (only called at one place), but makes me feel good. :^)
*/
GList * typ_type_remove(GList *list, FType *type)
{
	if(list == NULL || type == NULL)
		return NULL;

	if(TYP_IS_UNKNOWN(type))		/* Can't remove the "Unknown" type. */
		return list;

	list = g_list_remove(list, type);
	typ_type_destroy(type);

	return list;
}

/* 2002-03-17 -	Do a globbed lookup of a named type. Returns a list of matching FTypes, which
**		will be a sublist of the grand input <list> of all types. The list, of course,
**		will need to be freed when you're done. The data in it, however, will not.
*/
GList * typ_type_lookup_glob(const GList *list, const gchar *glob)
{
	GList		*ret = NULL;
	gchar		*re_src;
	regex_t		re;

	re_src = stu_glob_to_re(glob);
	if(regcomp(&re, re_src, REG_EXTENDED | REG_NOSUB | REG_ICASE) == 0)
	{
		for(; list != NULL; list = g_list_next(list))
		{
			if(regexec(&re, ((FType *) list->data)->name, 0, NULL, 0) == 0)
				ret = g_list_append(ret, list->data);
		}
		regfree(&re);
	}
	g_free(re_src);
	return ret;
}

/* ----------------------------------------------------------------------------------------- */

/* 1998-09-17 -	Change the name of given type. Since the name is used as a fall-back
**		when sorting types of equal priority, this calls for a resort.
** 1998-12-13 -	Removing the priorities also removed the sorting dependency on names,
**		so this became a lot simpler. I could remove the entire function,
**		but I'll keep it. You never know...
*/
GList * typ_type_set_name(GList *list, FType *type, const gchar *name)
{
	stu_strncpy(type->name, name, sizeof type->name);
	return list;
}

/* 1999-05-29 -	Set the 'style' field of <type> to the style whose name is <name>. */
GList * typ_type_set_style(GList *list, FType *type, StyleInfo *si, const gchar *name)
{
	if((type != NULL) && (si != NULL))
		type->style = stl_styleinfo_style_find(si, name);
	return list;
}

/* 1998-12-13 -	Move given <type> either up (<delta> == -1) or down (1). Returns
**		new version of <list>. Other <delta> values are illegal.
*/
GList * typ_type_move(GList *list, FType *type, gint delta)
{
	gint	pos, np;

	if(delta != -1 && delta != 1)
		return list;

	pos  = g_list_index(list,  type);
	list = g_list_remove(list, type);
	np   = pos + delta;
	if(np < 0)
		np = 0;
	else if(np > (gint) g_list_length(list) - 1)
		np = (gint) g_list_length(list) - 1;

	return g_list_insert(list, type, np);
}
