#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ots/libots.h>

/*
In this code I will try to take N articles and summarize them into one;
*/


#define MAX_LINES_PER_BLOCK 1
#define TOPICS_PER_BLOCK 20
#define SCORE_THRESHOLD 2

#define BUFF_SIZE 1024*8

typedef struct
{
  GList *topics;
  char *text;
  int swapable;
  int article_id;
} TextBlock;


TextBlock *
new_TextBlock (void)
{
  TextBlock *aBlock = g_new0 (TextBlock, 1);
  (GList *) aBlock->topics = NULL;
  (char *) aBlock->text = NULL;
  aBlock->swapable=1;
  return aBlock;
}


FILE *
stream_from_file (char *filename)
{
  if (filename)
    {
      FILE *input_stream = fopen (filename, "r");
      if (!input_stream)
	{
	  perror ("Couldn't load input file");
	  return NULL;
	}
      return input_stream;
    }
  return NULL;
}

GList *subtract_list_from_list(GList *MainList, GList *SubList)
{
	GList *tmplist1;
	GList *tmplist2;
	GList *newList=MainList;
	
	if (!(MainList)) return NULL;
	if (!(SubList)) return  NULL;
	
	tmplist1 = g_list_first(newList);
	while(tmplist1)
	{
	tmplist2 = g_list_first(SubList);
	while((tmplist2))
		{ 
		
		if ((tmplist1)&&(tmplist2)&&(tmplist1->data)&&(tmplist2->data))
		if (0==strncmp(tmplist1->data,tmplist2->data,255)) 
				{
						newList=g_list_remove_link(newList,tmplist1);
						tmplist1=newList;
				}
		tmplist2 = g_list_next(tmplist2);
		}
	tmplist1 = g_list_next(tmplist1);
	}
	
	return newList;
}

char *
buffer_from_file (FILE * stream)
{
  unsigned char fread_buffer[BUFF_SIZE];
  unsigned char *buffer;
  size_t nread, total_read, avail_size;

  if (stream == NULL)
    return NULL;

  buffer = g_new0 (unsigned char, BUFF_SIZE);
  avail_size = BUFF_SIZE;
  total_read = nread = 0;
  while ((nread =
	  fread (fread_buffer, sizeof (unsigned char), sizeof (fread_buffer),
		 stream)) > 0)
    {
      if (nread + total_read > avail_size)
	{
	  avail_size *= 2;
	  buffer = g_renew (unsigned char, buffer, avail_size);
	}

      strncpy (buffer + total_read, fread_buffer, nread);
      total_read += nread;
    }
  fclose (stream);
  return buffer;
}

void
swap_pointer (gpointer * a, gpointer * b)
{
  gpointer *c;
  c = a;
  a = b;
  b = c;
  return;
}

void
print_glist_of_words(GList *aList)
{
GList *tmplist;
tmplist = g_list_first(aList);
while(tmplist)
	{
	if (strcmp("\n",(char *)tmplist->data)) printf("-[%s]-",(char *)tmplist->data);
	tmplist = g_list_next(tmplist);
	}	
	printf("|\n");
}

int 
is_new_paragraph(GList *ln) //a glist of sentences
{

GList *first;
GList *second;
char *word1,*word2;

if ((ln)&&(ln->data)) {first=((OtsSentence *)ln->data)->words;} else {return 1;}
if (first) { second=first->next; } else {return 1;}

if (second) {
		word1=first->data;
		word2=second->data;
	} else {return 1;};	
if ((word1)&&(word2))  if ((0==strcmp(word1,"\n"))&&(0==strcmp(word2,"\n"))) return 1;
if ((word1))  if ((0==strcmp(word1,"\n\n"))) return 1;

return 0;
}

int
main (int argc, char *argv[])
{

  int j;
  OtsArticle *Art;
  OtsArticle *OriginalArt;
  GList *tmplist;

  if (argc < 3)
    {
      printf ("usage: join-art [BaseArticle.txt] [essay2.txt essay3.txt essay4.txt ...]\n");
      return 0;
    }


  char *main_text;
  main_text = buffer_from_file (stream_from_file (argv[1]));
  if (main_text == NULL)
    return 0;

 GList *line_pool = NULL;
  GList *main_common_topics = NULL;
  char *article_text;
  int len;

main_common_topics=ots_text_stem_list(main_text,"en",3);
printf("List of topics:\n");
print_glist_of_words(main_common_topics);
 
  for (j = 2; j < argc; j++)
    {

      Art = ots_new_article ();
      ots_load_xml_dictionary (Art, "en");
      article_text = buffer_from_file (stream_from_file (argv[j]));
      ots_parse_stream (article_text, strlen (article_text), Art);

      tmplist = g_list_first (Art->lines);
      while (tmplist)
	{
	
	
      	TextBlock *aBlock = new_TextBlock ();
	  		GString *block_text=g_string_new (NULL); 
	  		
	 
			/*
			int k=0;
	  		//for (k=1;k<LINES_PER_BLOCK;k++)
			while ((!(is_new_paragraph(tmplist))) && (k<MAX_LINES_PER_BLOCK))
			{k++;
	  		if (tmplist) g_string_append(block_text,ots_get_line_text (tmplist->data, 0, &len)); //2nd line
 			if (tmplist) tmplist = g_list_next (tmplist);
			}
	  		*/
	  		
	  		if (tmplist) g_string_append(block_text,ots_get_line_text (tmplist->data, 0, &len)); //2nd line
 			//if (tmplist) tmplist = g_list_next (tmplist);
			
			 
		  	aBlock->text=block_text->str; 
		  	aBlock->topics = ots_text_stem_list (aBlock->text, "en", TOPICS_PER_BLOCK);
			aBlock->article_id=j; 
			/*remove main article topics from list of subjects*/
			//printf("Before:");print_glist_of_words(aBlock->topics);
			aBlock->topics=subtract_list_from_list(aBlock->topics,main_common_topics);
			//printf("After:");print_glist_of_words(aBlock->topics);
			/*----*/
			line_pool= g_list_prepend (line_pool, aBlock);
		   g_string_free (block_text, FALSE);
      if (tmplist) tmplist = g_list_next (tmplist);
	}

      ots_free_article (Art);
    }				//loaded line_pool; now a list of TextBlocks;




  OriginalArt = ots_new_article ();
  ots_load_xml_dictionary (OriginalArt, "en");
  ots_parse_stream (main_text, strlen (main_text), OriginalArt);
  ots_grade_doc (OriginalArt);

  GList *main_article=NULL;		//a glist of TextBlocks;

  tmplist = g_list_first (OriginalArt->lines);
  while (tmplist)
    {
      	TextBlock *aBlock = new_TextBlock ();
	  		GString *block_text=g_string_new (NULL); 
	  		
	  		
	  		/*
			int k=0;
	  		//for (k=1;k<LINES_PER_BLOCK;k++)
			while ((!(is_new_paragraph(tmplist))) && (k<MAX_LINES_PER_BLOCK))
			{k++;
	  		if (tmplist) g_string_append(block_text,ots_get_line_text (tmplist->data, 0, &len)); //2nd line
	  		if (tmplist) tmplist = g_list_next (tmplist);
			}*/
			
	   
	  		if (tmplist) g_string_append(block_text,ots_get_line_text (tmplist->data, 0, &len)); //2nd line
	  		//if (tmplist) tmplist = g_list_next (tmplist);
	  		
	  		
		  	aBlock->text=block_text->str; 
		  	aBlock->topics = ots_text_stem_list (aBlock->text, "en", TOPICS_PER_BLOCK);
		  	main_article = g_list_append (main_article, aBlock);
		   g_string_free (block_text, FALSE);
      if (tmplist) tmplist = g_list_next (tmplist);
      
    }
  
  ots_free_article (OriginalArt); //Now main_article is a GList of Text Blocks;

  GList *main_index=NULL;
  GList *pool_index=NULL;
  
  
  int max;
  int score;
  GList *pos1=NULL;
  GList *pos2=NULL;
int substitute_flag=1;
int replaced=0;

while (substitute_flag)
{
  max=0;
  score=0;
  pos1=NULL;
  pos2=NULL;
      
  main_index = g_list_first (main_article);
  while (main_index)
    {
    	pool_index = g_list_first (line_pool);
      while (pool_index)
			{
			score=0;
			if ((main_index->data) && (pool_index->data))
			if (((TextBlock*)main_index->data)->swapable)
			score=ots_topic_list_score(((TextBlock*)main_index->data)->topics,
					  	((TextBlock*)pool_index->data)->topics);
			
			if (score>=max) {max=score; pos2=pool_index; pos1=main_index;}
	  		pool_index = g_list_next (pool_index);
			}
      main_index = g_list_next (main_index);
    }
replaced++;
  
		//replace in here
			
			if ((pos1)&&(pos2)&&(max>SCORE_THRESHOLD))
			{
			printf("replacing\n[%d][%s]\nwith (%d)\n[%d][%s]\n----\n",
			(((TextBlock *)pos1->data)->article_id),
			((TextBlock *)pos1->data)->text,
			max,
			(((TextBlock *)pos2->data)->article_id),
			((TextBlock *)pos2->data)->text);
			
			((TextBlock *)pos1->data)->text=((TextBlock *)pos2->data)->text;
			((TextBlock *)pos1->data)->swapable=0; //do not swap it again!

			
			print_glist_of_words(((TextBlock *)pos1->data)->topics);
			print_glist_of_words(((TextBlock *)pos2->data)->topics);
			printf("--------------------------------------------\n\n");
			pool_index=g_list_remove_link(pool_index,pos2); // removed line from pool
			} else {printf("Replaced (%d) blocks. No more text blocks to replace score[%d]\n----\n",replaced,max); substitute_flag=0;}
			
}//end of replace loop			
  
  /*Print Article!!!*/
  
  main_index = g_list_first (main_article);
  while (main_index)
    {
    printf("%s\n",((TextBlock *)main_index->data)->text);
      main_index = g_list_next (main_index);
    }

  return 0;
}
