/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 8; tab-width: 8 -*- */

/*
 *  Medusa
 *
 *  Copyright (C) 2000 Eazel, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *  Author: Rebecca Schulman <rebecka@eazel.com>
 */

/* medusa-master-db.c:  The top level database 
   that manages all of the indexes (the file index and the text index) */

#define FREED_URI_MARKER -1
#define END_OF_URI_LIST_MARKER 0

#include <dirent.h>
#include <glib.h>
#include <libgnomevfs/gnome-vfs-types.h>
#include <libgnomevfs/gnome-vfs-file-info.h>
#include <libgnomevfs/gnome-vfs-ops.h>	
#include <string.h>
#include <sys/types.h>
#include <unistd.h>

#include "medusa-conf.h"
#include "medusa-file-index.h"
#include "medusa-master-db.h"
#include "medusa-master-db-private.h"
#include "medusa-rdb-file.h"
#include "medusa-rdb-table.h"
#include "medusa-search-uri.h"
#include "medusa-text-index.h"
#include "medusa-text-index-queries.h"
#include "medusa-uri-list.h"
#include "medusa-uri-list-private.h"
#include "medusa-utils.h"

#define TEXT_INDEX_ON

typedef struct MedusaContentResult {
	gint32 *uris;
	gboolean uris_contain_content;
	int number_of_uris;
} MedusaContentResult;

static void        index_directory                    (MedusaMasterDB *master_db,
						       char *directory_name);
static void        index_file                         (MedusaMasterDB *master_db,
						       char *directory_name,
						       char *file_name,
						       GnomeVFSFileInfo *file_info);
static MedusaContentResult *    
                   content_requests_to_uri_numbers    (MedusaMasterDB *master_db,
						       GList *clause_closures);
/* Run a set of queries on a uri, and if the uri fits, add it to the current results */
static GList *     append_uri_to_results_if_matches   (GList *result_list,
						       GList *clause_closures,
						       MedusaMasterDB *master_db,
						       int uri_number);
static void        merge_new_uris_with_current_results_and_free (MedusaContentResult *old_results,
								 gint32 *new_uris,
								 int number_of_new_uris,
								 gboolean new_uris_are_inclusion_uris);
static gboolean    query_execute_from_clause_closure  (MedusaClauseClosure *clause_closure,
						       int uri_number);
static void        medusa_master_db_destroy           (MedusaMasterDB *db);


MedusaMasterDB *
medusa_master_db_new (char *root_uri,
		      MedusaLogLevel log_level,
		      char *uri_list,
		      char *file_attribute_index,
		      char *file_name_hash,
		      char *directory_name_hash,
		      char *mime_type_hash,
		      char *text_index_start_file,
		      char *text_index_location_file,
		      char *text_index_word_file,
		      char *text_index_temp_file)
{
	MedusaMasterDB *master_db;
	MedusaHash *file_names, *directory_names;

	master_db = g_new0 (MedusaMasterDB, 1);
  

	file_names = medusa_hash_new (file_name_hash,
				      FILE_HASH_BITS);
	directory_names = medusa_hash_new (directory_name_hash,
					   DIRECTORY_HASH_BITS);

	master_db->uri_list = medusa_uri_list_new (root_uri, log_level, uri_list, file_names, directory_names);
	master_db->text_index = medusa_text_index_new (text_index_start_file,
						       log_level,
						       text_index_location_file,
						       text_index_word_file,
						       text_index_temp_file);
	master_db->file_system_db = medusa_file_system_db_new (ROOT_DIRECTORY, file_attribute_index, file_names, directory_names, mime_type_hash);

	medusa_hash_ref (file_names);
	medusa_hash_ref (directory_names);
	master_db->root_uri = g_strdup (root_uri);
	
	master_db->log_level = log_level;

	master_db->ref_count = 1;
	return master_db;
}	      

void
medusa_master_db_index (MedusaMasterDB *master_db)
{
	/* Make sure that the databases are empty before we start */
	g_assert (medusa_uri_list_number_of_uris (master_db->uri_list) == 0);
	g_assert (medusa_file_system_db_get_number_of_records (master_db->file_system_db) == 0);
	index_directory (master_db, master_db->root_uri);
#ifdef TEXT_INDEX_ON
	medusa_text_index_finish_indexing (master_db->text_index);
#endif
}


void
medusa_master_db_update (MedusaMasterDB *master_db)
{
	/* FIXME */
}

/* Returns a glist of uri's */
GList *
medusa_master_db_query (MedusaMasterDB *master_db,
			char *search_uri)
{
	int i, number_of_uris, content_uri_position;
	GList *clause_closures;
	GList *result_list;
	MedusaContentResult *content_results;
	GList *content_clauses, *non_content_clauses;

	result_list = NULL;
	/* Turn the search URI into a set of functions and environments
	   to be run on each file, to find out if that file matches */
	clause_closures = medusa_search_uri_to_clause_closures (search_uri, master_db);
	/* Return nothing for uris that are invalid */
	if (clause_closures == NULL) {
		return NULL;
	}
	number_of_uris = medusa_uri_list_number_of_uris (master_db->uri_list);
	content_clauses = medusa_g_list_partition (clause_closures,
						   medusa_clause_closure_is_content_search,
						   NULL,
						   &non_content_clauses);
	if (content_clauses != NULL) {
		content_results = content_requests_to_uri_numbers (master_db,
								   content_clauses);
		g_assert (content_results != NULL);
		if (content_results->uris_contain_content == TRUE) {
			for (i = 0; i < content_results->number_of_uris; i++) {
				if (master_db->log_level == MEDUSA_DB_LOG_EVERYTHING) {
					printf ("Trying result %d\n", content_results->uris[i]);
				}
				result_list = append_uri_to_results_if_matches (result_list,
										non_content_clauses,
										master_db,
										content_results->uris[i]);
			}
		}
		else {
			content_uri_position = 0;
			/* Search everything but the content results */
			for (i = number_of_uris - 1; i > 0; i--) {
				if (content_results->uris[content_uri_position] == i) {
					content_uri_position++;
					continue;
				}
				result_list = append_uri_to_results_if_matches (result_list,
										non_content_clauses,
										master_db,
										i);
			}
		}
	}
	else {
		for (i = number_of_uris - 1; i > 0 ; i--) {
			result_list = append_uri_to_results_if_matches (result_list,
									clause_closures,
									master_db,
									i);
		}
	}
	return result_list;

}
			

static gboolean
query_execute_from_clause_closure (MedusaClauseClosure *clause_closure,
				   int uri_number)
{
	g_return_val_if_fail (clause_closure->query_func != NULL, FALSE);
	if (clause_closure->file_system_db != NULL) {
		return clause_closure->query_func (clause_closure->file_system_db,
						   medusa_rdb_record_number_to_record (clause_closure->file_system_db->file_database,
										       uri_number),
						   clause_closure->argument);
	}
	else {
		return clause_closure->query_func (clause_closure->uri_list,
						   medusa_rdb_record_number_to_record (clause_closure->uri_list->uri_names,
										       uri_number),
						   clause_closure->argument);
	}
}
				
				       

static void
index_directory (MedusaMasterDB *master_db,
		 char *directory_name)
{
	DIR *directory_table;
	char *plain_directory_name;
	struct dirent *directory_entry;
	char *full_file_name;
	GnomeVFSFileInfo *file_info;
	GnomeVFSResult result;
  
	plain_directory_name = &directory_name[7];
	directory_table = opendir (plain_directory_name);
	/* If the directory is unreadable, don't bother */
	g_return_if_fail (directory_table != NULL);
  
	while ((directory_entry = readdir (directory_table))) {
		if (!strcmp (directory_entry->d_name,".") || 
		    !strcmp (directory_entry->d_name,"..")) {
			continue;
		}
		
		full_file_name = medusa_full_uri_from_directory_and_file (directory_name,
									  directory_entry->d_name);
		
		if (medusa_uri_list_is_stoplist_file (full_file_name)) {
			if (master_db->log_level == MEDUSA_DB_LOG_ABBREVIATED ||
			    master_db->log_level == MEDUSA_DB_LOG_EVERYTHING) {
				printf ("Skipping file %s\n", full_file_name);
			}
			g_free (full_file_name);
			continue;
		}
		
		if (medusa_uri_list_is_nfs_mount_point (full_file_name)) {
			if (master_db->log_level == MEDUSA_DB_LOG_ABBREVIATED ||
			    master_db->log_level == MEDUSA_DB_LOG_EVERYTHING) {
				printf ("skipping nfs mount point %s for now\n", full_file_name);
			}
			g_free (full_file_name);
			continue;
		}

		file_info = gnome_vfs_file_info_new ();
		result = gnome_vfs_get_file_info (full_file_name,
						  file_info,
						  GNOME_VFS_FILE_INFO_GET_MIME_TYPE);
		if (result != GNOME_VFS_OK) {
			gnome_vfs_file_info_unref (file_info);
			g_free (full_file_name);
			continue;
		}

		index_file (master_db, directory_name, directory_entry->d_name, file_info);    

		if (file_info->type == GNOME_VFS_FILE_TYPE_DIRECTORY) {
			gnome_vfs_file_info_unref (file_info);
			index_directory (master_db, full_file_name);
		}
		else {
			gnome_vfs_file_info_unref (file_info);
		}
    
		g_free (full_file_name);
	}
	closedir (directory_table);
}


static void
index_file (MedusaMasterDB *master_db,
	    char *directory_name,
	    char *file_name,
	    GnomeVFSFileInfo *file_info)
{
	char *uri;
	int uri_number;
	
	uri_number = medusa_uri_list_number_of_uris (master_db->uri_list);
	medusa_uri_list_index_file (master_db->uri_list, directory_name, file_name);
	uri = medusa_uri_number_to_uri (master_db->uri_list, uri_number);
	if (master_db->log_level == MEDUSA_DB_LOG_EVERYTHING) {
		printf ("Indexing name and attributes of %s\n",uri);
	}
	if (master_db->log_level == MEDUSA_DB_LOG_ABBREVIATED) {
		printf ("%s\n",uri);
	}
	medusa_file_system_db_index_file (master_db->file_system_db, 
					  uri_number,
					  file_info);
#ifdef TEXT_INDEX_ON

	medusa_text_index_read_file (master_db->text_index,
				     uri,
				     uri_number,
				     file_info);
#endif
	g_free (uri);
}

static GList *     
append_uri_to_results_if_matches (GList *result_list,
				  GList *clause_closures,
				  MedusaMasterDB *master_db,
				  int uri_number)
{
	gboolean query_result;
	GList *next_clause;
	
	query_result = TRUE;
	/* Need two cases: the first is for non-partitioned lists, the second for 
	   parititioned lists */
	for (next_clause = clause_closures; next_clause != NULL && next_clause->data != NULL; next_clause = next_clause->next) {
		
		if (query_execute_from_clause_closure (next_clause->data, uri_number) == FALSE) {
			query_result = FALSE;
			break;
		}
	}
	if (query_result) {
		result_list = g_list_prepend (result_list, 
					      medusa_uri_number_to_uri (master_db->uri_list,
									uri_number));
	}
	return result_list;
		
}

static MedusaContentResult *    
content_requests_to_uri_numbers (MedusaMasterDB *master_db,
				 GList *clause_closures)
{
	MedusaClauseClosure *closure, *new_closure_data;
	GList *new_closure;
	gint32 *new_uris;
	int number_of_new_uris;
	MedusaContentResult *results;
	

	g_return_val_if_fail (g_list_length (clause_closures) > 0, NULL);
	closure = clause_closures->data;
	g_assert (closure->is_content_request == TRUE &&
		  (closure->inclusion_query == TRUE ||
		   closure->inclusion_query == FALSE));
	results = g_new0 (MedusaContentResult, 1);
	results->uris_contain_content = closure->inclusion_query;
	results->uris = medusa_text_index_word_to_uri_numbers (master_db->text_index,
							       closure->content_request,
							       &results->number_of_uris);
	/* No intersection work to do if there is only one criterion */
	if (g_list_length (clause_closures) == 1) {
		return results;
	}
	/* Otherwise, keep intersecting the results with the new results */
	for (new_closure = clause_closures->next; 
	     new_closure != NULL; 
	     new_closure = new_closure->next) {
		new_closure_data = (MedusaClauseClosure *) new_closure->data;
		new_uris = medusa_text_index_word_to_uri_numbers (master_db->text_index,
								  new_closure_data->content_request,
								  &number_of_new_uris);
		merge_new_uris_with_current_results_and_free (results,
							      new_uris,
							      number_of_new_uris,
							      new_closure_data->inclusion_query);
						     
	}
	return results;
}

static void
merge_new_uris_with_current_results_and_free (MedusaContentResult *old_results,
					      gint32 *new_uris,
					      int number_of_new_uris,
					      gboolean new_uris_are_inclusion_uris)
{
	gint32 *merged_results;
	int number_of_merged_results;
	/* Several ways to merge here, depending on whether the
	   content lists are for inclusion or for exclusion.
	   We assume intersection of the criteria */
	
	/* This function will break if these aren't the case,
	   so check now */
	g_assert (old_results->uris_contain_content == TRUE ||
		  old_results->uris_contain_content == FALSE);
	g_assert (new_uris_are_inclusion_uris == TRUE ||
		  new_uris_are_inclusion_uris == FALSE);
	
	/* Case 1: both the current results and the new uris are positive 
	   content requests, or both are negative content requests.
	   Just merge the two lists.  
	   We can do this merge linearly, because the text index
	   results are in descending order */ 
	if (old_results->uris_contain_content == new_uris_are_inclusion_uris) {
	        merged_results = medusa_intersect_two_descending_integer_lists (old_results->uris,
										old_results->number_of_uris,
										new_uris,
										number_of_new_uris,
										&number_of_merged_results);
		
	}
	/* Case two, remove results, if the new results are things _not_ to be included,
	   and the old are for inclusion */
	if (old_results->uris_contain_content == TRUE && new_uris_are_inclusion_uris == FALSE) {
		merged_results = medusa_difference_of_two_descending_integer_lists (old_results->uris,
										    old_results->number_of_uris,
										    new_uris,
										    number_of_new_uris,
										    &number_of_merged_results);
	}
	
	/* Case three, is the reflection of two */
	if (old_results->uris_contain_content == FALSE && new_uris_are_inclusion_uris == TRUE) {
		merged_results = medusa_difference_of_two_descending_integer_lists (new_uris,
										    number_of_new_uris,
										    old_results->uris,
										    old_results->number_of_uris,
										    &number_of_merged_results);
		/* We need to change the negative inclusion to a positive one now */
		old_results->uris_contain_content = TRUE;
	}

	g_free (old_results->uris);  
	g_free (new_uris);  
	
	old_results->uris = merged_results;
	old_results->number_of_uris = number_of_new_uris;

}

void
medusa_master_db_ref (MedusaMasterDB *master_db)
{
	g_assert (master_db->ref_count > 0);
	master_db->ref_count++;

}				       
 
void
medusa_master_db_unref (MedusaMasterDB *master_db)
{
	g_assert (master_db->ref_count > 0);
	if (master_db->ref_count == 1) {
		medusa_master_db_destroy (master_db);
	}
	else {
		master_db->ref_count--;
	}
  
}

static void
medusa_master_db_destroy (MedusaMasterDB *master_db)
{
	g_assert (master_db->ref_count == 1);

	medusa_uri_list_unref (master_db->uri_list);
	medusa_file_system_db_free (master_db->file_system_db);
	/* FIXME: Put text index in here when applicable */

	g_free (master_db->root_uri);
	g_free (master_db);
}


