#!/usr/bin/python
import pkg_resources
pkg_resources.require("TurboGears")

from sqlobject import *
import sys, os, string
import turbogears
from mirrormanager.model import *
from mirrormanager.repomap import *
from mirrormanager.lib import manage_pidfile, remove_pidfile
import re
import glob
import sha as shamod
import md5 as md5mod
import yum.repoMDObject
import datetime
from turbogears import config

rootdir='/'
pidfile='/var/run/mirrormanager/umdl.pid'

# look on the command line for a desired config file
if len(sys.argv) < 2:
    print "usage: update-master-directory-list dev.cfg [rootdir]"
    sys.exit(1)
if len(sys.argv) >= 2:
    turbogears.update_config(configfile=sys.argv[1], modulename="mirrormanager.config")
if len(sys.argv) >= 3:
    rootdir=sys.argv[2]

if manage_pidfile(pidfile):
    print "another instance is running, try again later."
    sys.exit(1)

from turbogears.database import PackageHub
hub = PackageHub("mirrormanager")
__connection__ = hub

def trim_os_from_dirname(dirname):
    # trim the /os off the name
    index = dirname.rfind('/os')
    if index > 0:
        dirname = dirname[:index]
    return dirname

def rename_SRPMS_source(l):
    rc = []
    for i in l:
        if i == 'source':
            pass
        elif i == 'SRPMS':
            rc.append('source')
        else:
            rc.append(i)
    return rc


def _get_version_from_path(path):
    s = r'/(([\.\d]+)(\-\w+)?)/'
    m = re.search(re.compile(s), path)
    return m.group(1)

def create_version_from_path(category, path):
    ver = None
    vname = _get_version_from_path(path)
    if vname is not None and vname != '':
        if '/test/' in path:
            isTest = True
        else:
            isTest = False
        ver = Version(product=category.product, name=vname, isTest=isTest)

    return ver

def guess_ver_arch_from_path(category, path):
    arch = None
    if 'SRPMS' in path:
        arch = Arch.byName('source')
    else:
        for a in Arch.select():
            s = '.*(^|/)%s(/|$).*' % (a.name)
            if re.compile(s).match(path):
                arch = a
                break

    ver = None
    # newest versions/IDs first, also handles stupid Fedora 9.newkey hack.
    for v in Version.select(orderBy='-id'):
        if v.product != category.product: continue
        s = '.*(^|/)%s(/|$).*' % (v.name)
        if re.compile(s).match(path):
            ver = v
            break

    # create Versions if we can figure it out...
    if ver is None:
        ver = create_version_from_path(category, path)
        
    return (ver, arch)


# Something like this is committed to yum upstream, but may not be in the copy we are using.
def set_repomd_timestamp(yumrepo):
    timestamp = 0
    for ft in yumrepo.fileTypes():
        thisdata = yumrepo.repoData[ft]
        timestamp = max(int(thisdata.timestamp), timestamp)
    yumrepo.timestamp = timestamp
    return timestamp

def make_file_details_from_checksums(dir):
    def _parse_checksum_file(path):
        r = {}
        try:
            f = open(path, 'r')
            for line in f.readlines():
                line = line.strip()
                s = line.split()
                if len(s) < 2:
                    continue

                # parse for only md5 and sha1 values
                s0len = len(s[0])
                if s0len != 32 and s0len != 40:
                    continue
                # strip off extraneous starting '*' char from name
                s[1] = s[1].strip('*')
                r[s[1]] = s[0]
            f.close()
        except:
            pass
        return r

    def _checksums_from_globs(dirname, globs):
        d = {}
        checksum_files = []
        for g in globs:
            checksum_files.extend(glob.glob(os.path.join(rootdir, dirname, g)))
        for f in checksum_files:
            d.update(_parse_checksum_file(f))
        return d

    sha1_globs = ['*.sha1sum', 'SHA1SUM']
    md5_globs = ['*.md5sum', 'MD5SUM']
    md5dict = _checksums_from_globs(dir.name, md5_globs)
    sha1dict = _checksums_from_globs(dir.name, sha1_globs)

    files = set()
    for k in md5dict.keys():
        files.add(k)
    for k in sha1dict.keys():
        files.add(k)

    for f in files:
        sha1 = sha1dict.get(f)
        md5  = md5dict.get(f)
        s = os.stat(os.path.join(rootdir, dir.name, f))
        size = s.st_size
        mtime = s.st_mtime
        try:
            fd = FileDetail.selectBy(directory=dir, filename=f, sha1=sha1, md5=md5, size=size, timestamp=mtime)[0]
        except IndexError:
            fd = FileDetail(directory=dir, filename=f, sha1=sha1, md5=md5, timestamp=mtime, size=size)
    

def make_repomd_file_details(dir):
    repodataDir = dir.name + '/repodata'
    repomd_fname = os.path.join(rootdir, dir.name, 'repodata', 'repomd.xml')
    if not os.path.exists(repomd_fname):
        return
    try:
        f = open(repomd_fname, 'r')
        repomd = f.read()
        f.close()
    except:
        return
    size = len(repomd)
    sha1 = shamod.new(repomd).hexdigest()
    md5 = md5mod.new(repomd).hexdigest()

    yumrepo = yum.repoMDObject.RepoMD('repoid', repomd_fname)
    if 'timestamp' not in yumrepo.__dict__:
        set_repomd_timestamp(yumrepo)
    timestamp = yumrepo.timestamp
    dir = Directory.byName(repodataDir)
    try:
        fd = FileDetail.selectBy(directory=dir, filename='repomd.xml', sha1=sha1, md5=md5, timestamp=timestamp, size=size)[0]
    except IndexError:
        fd = FileDetail(directory=dir, filename='repomd.xml', sha1=sha1, md5=md5, timestamp=timestamp, size=size)

def make_repository(dir, category):
    repo = None
    path = dir.name[len(category.topdir.name)+1:]
    (ver, arch) = guess_ver_arch_from_path(category, path)
    if ver is None or arch is None:
        return None
    path = trim_os_from_dirname(path)
    name=path.split('/')
    name = rename_SRPMS_source(name)
    name='-'.join(name)
    name='%s-%s-%s' % (category.product.name, category.name, name)

    prefix = repo_prefix(path, category, ver)
    try:
        repo = Repository(name=name, category=category, version=ver, arch=arch, directory=dir, prefix=prefix)
    except:
        pass

    return repo

    


def nuke_gone_directories(category, category_directories):
    """ deleting a Directory has a ripple effect through the whole
        database.  Be really sure you're ready do to this.  It comes
        in handy when say a Test release is dropped."""
        
    for d in Directory.select():
        if len(d.categories == 1) and category in d.categories:
            if not category_directories.has_key(d.name):
                d.destroySelf()

unreadable_dirs = {}

def parent_dir(path):
    sdir = path.split('/')[:-1]
    return '/'.join(sdir)

def make_one_directory(line, category, path, category_directories):
    global unreadble_dirs
    readable=True
    d = line.split()[4]
    if re.compile('^\.$').match(d):
        dname = path
    else:
        dname = "%s/%s" % (path, d)
    perms = line.split()[0]
    if not re.compile('^d......r.x').match(perms) or parent_dir(dname) in unreadable_dirs:
        readable=False
        unreadable_dirs[dname] = True

    category_directories[dname] = {'files':{}, 'isRepository':False, 'readable':readable}
    if d.endswith('repodata'):
        parent_dname = dname[:-len('/repodata')]
        try:
            category_directories[parent_dname]['isRepository'] = True
        except KeyError:
            category_directories[parent_dname] = {'files':{}, 'isRepository':True, 'readable':readable}
        
            
    return dname, category_directories

def add_file_to_directory(line, dname, path, category_directories):
    perm, size, date, time, filepath = line.split()
    year, month, day = date.split('/')
    hour, minute, second = time.split(':')
    dt = datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), int(second))
    l = filepath.split('/')
    filename = l[-1]
    subpath = l[:-1]
    if len(subpath) > 0:
        dirpath = ("%s/" % path) + '/'.join(subpath)
    else:
        dirpath = path
    category_directories[dirpath]['files'][filename] = {'size':size,
                                                        'stat':dt}

def short_filelist(files):
    html=0
    rpms=0
    hdrs=0
    for f in files.keys():
        if f.endswith('.html'): html=html+1
        if f.endswith('.rpm'):  rpms=rpms+1
        if f.endswith('.hdr'):  hdrs=hdrs+1
    if html>10 or rpms > 10 or hdrs > 10:
        date_file_list = []
        rc = {}
        for k in files.keys():
            date_file_tuple = (files[k]['stat'], k, files[k]['size'])
            date_file_list.append(date_file_tuple)
            date_file_list.sort()
            date_file_list = date_file_list[-10:]
        
        for stat, k, size in date_file_list:
            rc[k] = files[k]
        return rc
    else:
        return files

def sync_category_directories(category, category_directories):
    excludes=['.snapshot', '.~tmp~']
    for dirpath, value in category_directories.iteritems():
        if excludes[0] in dirpath or excludes[1] in dirpath:
            continue
        try:
            dir = Directory.byName(dirpath)
            if dir.readable != value['readable']:
                dir.readable = value['readable']
        except SQLObjectNotFound:
            dir = Directory(name=dirpath,readable=value['readable'])
            dir.addCategory(category)
        if dir.files != short_filelist(value['files']):
            dir.files = short_filelist(value['files'])
        make_file_details_from_checksums(dir)

    # this has to be a second pass to be sure the child repodata/ dir is created in the db first
    for dirpath, value in category_directories.iteritems():
        if value['isRepository']:
            dir = Directory.byName(dirpath)
            make_repository(dir, category)
            make_repomd_file_details(dir)
    ageFileDetails()

def parse_rsync_listing(cname, f):
    category = Category.byName(cname)
    category_directories = {}
    for line in f.readlines():
        line.strip()
        if line.startswith('d'):
            if re.compile('^\.$').match(line):
                # we know the top-level category directory already exists, don't try to re-make it
                pass
            else:
                dname, category_directories = make_one_directory(line, category, category.topdir.name, category_directories)
        else:
            add_file_to_directory(line, dname, category.topdir.name, category_directories)
    sync_category_directories(category, category_directories)



def sync_directories_using_rsync(rsyncpath, cname, extra_rsync_args=None):
    cmd = 'rsync -r --exclude=.snapshot --exclude=\*.~tmp~'
    if extra_rsync_args is not None:
        cmd += ' ' + extra_rsync_args
    cmd += ' ' + rsyncpath
    try:
        f = os.popen(cmd)
    except:
        print "Unable to parse category %s rsyncpath %s" % (cname, rsyncpath)
        return
    parse_rsync_listing(cname, f)
    f.close()

def sync_directories_from_file(filename, cname):
    f = open(filename, 'r')
    parse_rsync_listing(cname, f)
    f.close()


for i in config.get('umdl.master_directories'):
    try:
        options = i['options']
    except KeyError:
        options = None

    if i['type'] == 'rsync':
        sync_directories_using_rsync(i['url'], i['category'], options);

    if i['type'] == 'file':
        sync_directories_from_file(i['url'], i['category'], options);

remove_pidfile(pidfile)
