#!/usr/bin/env python
#
# remove-extraneous-previews: A stripped-down version of purge-previews
# meant for use as part of an update-to-update.1 workflow.
#
# Released under the MIT License; Copyright (c) 2008 Phil Bordelon
#
#####

import secore
import os
import os.path
import sys

# DATASTORE_PATH: The path to the Sugar datastore index.  Note that
# this code completely relies on the underlying 'secore' Xapian implementation;
# as this problem has been fixed in newer Sugar builds, once the datastore
# format changes this won't be needed anyway.
DATASTORE_PATH = "/home/olpc/.sugar/default/datastore"

DATASTORE_CHARACTERS = ("0", "1", "2", "3",
                        "4", "5", "6", "7",
                        "8", "9", "a", "b",
                        "c", "d", "e", "f")

def is_datastore_filename(filename):

   # Datastore filenames have this format:
   #
   # 5e12f499-0fc2-44a3-9c16-69e0fe60234e
   #
   # That's thirty-six characters long, with the 9th, 14th, 19th, and 24th
   # as dashes, and the rest as lowercase hex.

   # Test one: 36 characters.
   if len(filename) != 36:
      return False

   # Test two: Dashes in the right spaces.
   if (filename[8] != "-" or filename[13] != "-" or
    filename[18] != "-" or filename[23] != "-"):
      return False

   # Test three: If we remove all hex characters, we should be left with
   # precisely four dashes.  If not, it's not valid.
   non_hex_bits = [x for x in filename if x not in DATASTORE_CHARACTERS]
   non_hex_string = "".join(non_hex_bits)
   if non_hex_string != "----":
      return False

   # It passed all the tests.
   return True

def purge_previews(datastore_path):

   # Build our SearchConnection with which we'll probe the index of the
   # datastore.
   datastore_index_path = os.path.join(datastore_path, "store", "index")

   try:
       search_conn = secore.SearchConnection(datastore_index_path)
   except secore.xapian.DatabaseOpeningError, er:

       # The datastore does not even exist: we're done
       return

   # Get the directory with previews and get a list of files stored there.
   preview_dir = os.path.join(datastore_path, "store", "preview")
   for preview_file in os.listdir(preview_dir):

      # People may have files that aren't "preview" files in here; we don't
      # want to handle those.
      if not is_datastore_filename(preview_file):
         sys.stderr.write("WARNING: Detected non-preview file %s; skipping.\n" % preview_file)
         continue

      # All right; we assume that this is a real preview file.  Now, we
      # check to see if it has a partner-in-crime in the datastore.  Since
      # preview images and datastore objects share the same UID, and the
      # underlying Xapian index uses that UID as the key, we use it to attempt
      # to find the document we want.  get_document() throws an exception if
      # it cannot find a matching document, which means it should be purged.
      try:
         search_conn.get_document(preview_file)
      except KeyError:

         # No matching datastore file.  Purge.
         sys.stderr.write("Deleting orphan preview file: %s" % preview_file)
         os.unlink(os.path.join(preview_dir, preview_file))

   # Now that we're done with the SearchConnection, close it.  Strictly
   # speaking, this is unnecessary; we've made no modifications to the
   # database, so letting the connection get garbage-collected should make
   # no difference.  But it's better to be polite.
   search_conn.close()

if "__main__" == __name__:
   purge_previews(DATASTORE_PATH)
