#!/usr/bin/python2.7
'''
Created on Jan 11, 2012

@author: flid
'''

import rpm
import argparse
import sys
import subprocess
import re
import os
from urllib2 import urlopen, HTTPError, URLError
import zlib
import glob
import shutil
import platform


ARCH = platform.machine()

def vprint(text):
    '''Print the message only if verbose mode is on'''
    if(command_line_arguments.verbose):
        print(text)

def qprint(text):
    '''Print the message only if quiet mode is off'''
    if(not command_line_arguments.quiet):
        print(text)
        
        
def eprint(text, fatal=False, code=1):
    '''Print the message to stderr. Exit if fatal'''
    print >> sys.stderr, text
    if (fatal):
        exit(code)
        
        
def get_command_output(command, fatal_fails=True):
    '''Execute command using subprocess.Popen and return its stdout output string. If return code is not 0, print error message end exit'''
    vprint("Executing command: " + str(command))
    res = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output = res.communicate()
    if(res.returncode != 0 and fatal_fails):
        eprint("Error calling command '" + " ".join(command) + "'")
        if(output[1] != None or output[0] != None):
            eprint("Error message: \n"+  ((output[0].strip() + "\n") if output[0]!=None else "") +
                  (output[1].strip() if output[1]!=None else "") )
        exit(1)
    return [output[0], output[1], res.returncode]


tags = ['epoch', 'version','release', 'distepoch', 'disttag','provides','requires','obsoletes','suggests', 'conflicts']
def load_installed():
    '''Returns a {pkg_name_i:{tags:values}}. See tags list.'''
    
    qprint("Loading the list of installed packages...")
    installed_packages = {}
    ts = rpm.TransactionSet() 
    mi = ts.dbMatch() 
    for h in mi: 
        if(h['name'] == 'gpg-pubkey'):
            continue
        if(h['name'] not in installed_packages):
            installed_packages[h['name']] = {}
        else:
            qprint("Duplicating " + h['name'] + '-' + h['version'] + '-' + h['release'])
            qprint("Already found: " + h['name'] + '-' + installed_packages[h['name']]["text_version"])
        for tag in tags:
            installed_packages[h['name']][tag] = h[tag]

        if(installed_packages[h['name']]['epoch'] == None):
            installed_packages[h['name']]['epoch'] = 0
        installed_packages[h['name']]["text_version"] =  str(installed_packages[h['name']]['epoch']) + ":" + \
                installed_packages[h['name']]['version'] + "-" + installed_packages[h['name']]['release']     
    return installed_packages

media_urls = {}
medium_by_url = {}
def load_media_urls():
    global media_urls, cmd, medium_by_url
    if(media_urls):
        return
    vprint("Loading media urls...")
    re_slash = re.compile("/")
    lines = get_command_output(cmd + ["--list-url"])[0].strip().split("\n")
    srpm_urls = []
    for line in lines:
        parts = line.split(" ")
        if(parts[-1].endswith("/")):
            parts[-1] = parts[-1][:-1]
        if(re_slash.search(parts[-1]) is not None):
            name = ' '.join(parts[:-1])
            media_urls[name] = parts[-1]
            medium_by_url[parts[-1]] = name

def list_media_urls():
    '''Returns a list of urls of all the included media'''
    global media_urls
    load_media_urls()
    srpm_urls = []
    for url in media_urls:
        srpm_urls.append(media_urls[url])
    return srpm_urls   

def get_package_fields2(name, disttag=None, distepoch=None):
    """Return (name, version, release, arch) tuple from a rpm
    package name.
    
    Handle both names with and without
    {release}-{disttag}{distepoch}.
    """
    _nvra_re = re.compile('^(?P<name>.+)-'
                   '(?P<version>[^-]+)-'
                   '(?P<release>[^-].*)\.'
                   '(?P<arch>.+)$')
    # If package has disttag and/or distepoch, we'll remove them
    # from the name so that it's parsable:
    if disttag is not None:
        tagepoch = '-%s' % disttag
        if distepoch is not None:
            tagepoch += distepoch
        name = name.replace(tagepoch, '', 1)

    match = _nvra_re.match(name)
    if not match:
        raise ValueError, 'Malformed RPM name: %s' % name
    return [match.group('name'), match.group('version') + '-' + match.group('release')]

def get_package_fields1(rpmname, disttagepoch):
    """Parse name and verrel.
    Function that parses name, version and release of a package.
    """
    string = rpmname.split('-')
    lastpart = string.pop()
    tmp = lastpart.split('.')
    tmp.pop()
    lastpart = '.'.join(tmp)
    if (lastpart[0].isdigit() or (not lastpart.startswith(disttagepoch))) and\
            (not lastpart.isdigit()):
        name = '-'.join(string[:-1])
        ver = string[-1]
        rel = lastpart
    else:
        name = '-'.join(string[:-2])
        ver = string[-2]
        rel = string[-1]
    return [name, ver + '-' + rel]  
 
#print RPMNameFilter('glean-1.1-2.cvs20100209mdv2011.0.x86_64')
def get_package_fields(rpmname):
        ''' Returns [name, version] for given rpm file or package name '''
        suffix = ['.x86_64', '.i586', '.noarch']
        for s in suffix:
            if(rpmname.endswith(s)):
                rpmname = rpmname[:-len(s)]

            
        re_version =  re.compile("(\.)?((alpha)|(cvs)|(svn)|(r))?\d+((mdv)|(mdk)|(mnb))")
        sections = rpmname.split("-")
        if(re_version.search(sections[-1]) == None):
            name = sections[:-3]
            version = sections[-3:-1]
            #print 222
        else:
            #print 111
            name = sections[:-2]
            version = sections[-2:]
        return ["-".join(name), "-".join(version)]


#s = 'python-curl-7.19.0-7-mdv2011.0.i586'
#r2 = get_package_fields2(s, 'mdv', '2011.0')
#r1 = get_package_fields1(s, 'mdv2011.0')
#r = get_package_fields(s)
#print r
#print r1
#print r2
#exit()

#mingw32-dlfcn-0-r11.3mdv2011.0.noarch
#ircd-2.11.1-p1.2mdv2011.0.x86_64
url_by_synthesis_url = {}
def parse_synthesis():
    global fields, url_by_synthesis_url
    
    def get_synthesis_by_url(url):
        parts = url.split('/')
        url_base = '/'.join(parts[:-2])
        parts = parts[-2:]
        if(parts[-1] == 'release'):
            parts = parts[:-1]
        synth_name = 'synthesis.hdlist_' + '_'.join(parts) + '.cz'
        return url_base + "/media_info/" + synth_name
            
    urls = list_media_urls()
    medium_by_synth = {}
    synthesis_lists = []
    for url in urls:
        synth = get_synthesis_by_url(url)
        synthesis_lists.append(synth)
        url_by_synthesis_url[synth] = url
        medium_by_synth[synth] = medium_by_url[url]

    def clear_data():
        '''Clears the data of the current package from 'fields' dictionary'''
        global fields
        fields = {"provides":[], "requires":[], "obsoletes":[], "suggests":[], 
                  "conflicts":[], "info":[], "summary":[]}
    
    repository = {}
    for synthesis_list in synthesis_lists:
        try:
            #print synthesis_list
            qprint("Processing medium " + medium_by_synth[synthesis_list] + "...")
            if(synthesis_list.startswith("http://") or synthesis_list.startswith("ftp://")):
                r = urlopen(synthesis_list)
                s = r.read()
                r.close()
            elif(synthesis_list.startswith("rsync://")):
                tmppath = '/tmp/urpm-reposync.synthesis_lists'
                if (not os.path.exists(tmppath)):
                    os.mkdir(tmppath)
                filename = tmppath + '/' + os.path.basename(synthesis_list)
                os.system("rsync --copy-links %s %s 1>/dev/null 2>&1" % (synthesis_list, filename))
                r = open(filename)
                s = r.read()
                r.close()
                shutil.rmtree(tmppath)
            res = subprocess.Popen(['gzip', '-d'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            output = res.communicate(s)
            clear_data()
            for line in output[0].split('\n'):
                if(line.strip() == ''):  # there can be empty lines
                    continue
                items = line.split("@")
                data = [x.strip() for x in items[2:]]
                fields[items[1]] = data
                if(items[1] == "info"):
                    d = get_package_fields(items[2])
                    #print line
                    disttag = None
                    distepoch = None
                    disttagepoch = '-'
                    if(len(items)>6):
                        disttagepoch = items[6]
                        disttag = items[6]
                    if(len(items)>7):
                        disttagepoch += items[7]
                        distepoch = items[7]
                    #print disttagepoch
                    #d1 = get_package_fields1(items[2], disttagepoch)
                    #d2 = get_package_fields2(items[2], disttag, distepoch)
                    #if(d[0] != d1[0] or d[1]!=d1[1] or d[0] != d2[0] or d[1] != d2[1]):
                        #print 'ALARM!!! ' + items[2]
                        #print str(d) + " --- " + str(d1) + " --- " + str(d2)
                    name = d[0]
                    epoch = items[3]
                    version = '-'.join(d[1].split('-')[:2])
                    #if(not version[0].isdigit()):
                    #    print version + " --- " + items[2]
                    text_version = epoch + ":" + version
                    if(name not in repository):
                        repository[name] = {}
                        
                    if('text_version' not in repository[name] or    # if processed pkg is newer then in 'repository'
                       rpm.evrCompare(text_version, repository[name]['text_version']) == 1):
                        repository[name]["epoch"] = epoch
                        repository[name]["synthesis_list"] = synthesis_list
                        repository[name]["version"] = version
                        repository[name]["text_version"] = text_version
                        for tag in ['provides','requires','obsoletes','suggests', 'conflicts']:
                            repository[name][tag] = fields[tag][:] 
                        repository[name]['medium'] = medium_by_synth[synthesis_list]
                
                    clear_data()
        except (HTTPError,URLError):
            eprint("File can not be processed! Url: " + synthesis_list)
    return repository
    
def parse_command_line():
    global command_line_arguments
    arg_parser = argparse.ArgumentParser(description='reposync is used to synchronize a set of packages on the local computer with the remote repository.') 
    
    arg_parser.add_argument('--include-media', '--media', action='append',nargs = '+', help="Use only selected URPM media")
    arg_parser.add_argument('--exclude-media', action='append',nargs = '+', help="Do not use selected URPM media")
    #arg_parser.add_argument('-x', '--exclude-packages', action='store',nargs = '+', help="Exclude package(s) by regex")
    arg_parser.add_argument('-v', '--verbose', action='store_true', help="Verbose (print additional info)")
    arg_parser.add_argument('-q', '--quiet', action='store_true', help="Quiet operation. Senseless without --auto.")
    arg_parser.add_argument('-a', '--auto', action='store_true', help="Do not ask questions, just do it!")
    
    command_line_arguments  = arg_parser.parse_args(sys.argv[1:])
    if(command_line_arguments.quiet and not command_line_arguments.auto):
        eprint("It's senseless to use --quiet without --auto!", fatal=True, code=2)
    
to_update = []
to_downgrade = []
to_remove = []
#rpm_files = {}
rpm_files = []
downloaded_rpms_dir = '/tmp/urpm-reposync.rpms'

def download_packages():
    global rpm_files, to_update, to_downgrade, to_remove
    if(len(to_update + to_downgrade)==0):
        return
    qprint('Downloading packages...')
        
    if(not os.path.exists(downloaded_rpms_dir)):
    #    shutil.rmtree(downloaded_rpms_dir)
        os.mkdir(downloaded_rpms_dir)
    
    cmd = ["urpm-downloader"]
    if(include_media):
        vprint("Include_media: " + include_media)
        cmd += ['--include-media'] + include_media
    if(exclude_media):
        vprint("Exclude_media: " + include_media)
        cmd += ['--exclude-media'] + exclude_media
    
    cmd += ['--dest-dir', downloaded_rpms_dir, '-r']
    #i = 1
    l = str(len(to_update + to_downgrade))
    #all_files = []
    load_media_urls()
    res = get_command_output(cmd + to_update + to_downgrade)
    for line in res[0].strip().split("\n"):
        if(not line.startswith('File exists, skipping: ') and not line.startswith('Downloaded: ')):
            continue
        new_file = downloaded_rpms_dir + '/' + line.split('/')[-1].strip()
            
        if(new_file in rpm_files):  # file already sheduled for installing
            continue            
        rpm_files.append(new_file)
        vprint("Downloaded file: " + ': '.join(line.split(': ')[1:]).strip())
        items = line.split('/')
        filename = items[-1]
        
        [name, version] = get_package_fields(filename)
        if name not in installed:
            installed[name] = {}
        installed[name]['downloaded_file'] = downloaded_rpms_dir + '/' + filename
        installed[name]['downloaded_version'] = version  
    
flags = {0:'RPMCALLBACK_UNKNOWN',
    1:'RPMCALLBACK_INST_PROGRESS',
    2:'RPMCALLBACK_INST_START',
    4:'RPMCALLBACK_INST_OPEN_FILE',
    8:'RPMCALLBACK_INST_CLOSE_FILE',
    16:'RPMCALLBACK_TRANS_PROGRESS',
    32:'RPMCALLBACK_TRANS_START',
    64:'RPMCALLBACK_TRANS_STOP',
    128:'RPMCALLBACK_UNINST_PROGRESS',
    256:'RPMCALLBACK_UNINST_START',
    512:'RPMCALLBACK_UNINST_STOP',
    1024:'RPMCALLBACK_REPACKAGE_PROGRESS',
    2048:'RPMCALLBACK_REPACKAGE_START',
    4096:'RPMCALLBACK_REPACKAGE_STOP',
    8192:'RPMCALLBACK_UNPACK_ERROR',
    16384:'RPMCALLBACK_CPIO_ERROR'}

rpmtsCallback_fd = None 
def runCallback(reason, amount, total, key, client_data): 
    global i
    vprint ("rpm_callback called with flag " + flags[reason])
    global rpmtsCallback_fd 
    
    if reason == rpm.RPMCALLBACK_INST_OPEN_FILE: 
        vprint ("Opening file: " + key)
        rpmtsCallback_fd = os.open(key, os.O_RDONLY) 
        return rpmtsCallback_fd 
    
    elif reason == rpm.RPMCALLBACK_INST_START: 
        #print "Closing file. ", reason, amount, total, key, client_data 
        os.close(rpmtsCallback_fd)
         
def filter_toremove():
    vprint("Filtering to_remove list... Inital list (" + str(len(to_remove)) +" items): " + str(to_remove))
    
    def dep_needed(dep):
        ''' return: 0 - not required, 1 - required, 2 - required only by packages under removing '''
        req_removing = []
        for p in installed:
            if dep in installed[p]['requires']:
                if(p not in to_remove):
                    return [1]
                req_removing.append(p)
        if(req_removing):
            return [2, req_removing]
        else:
            return [0]
            
    for pkg in to_remove[:]:
        provides = installed[pkg]['provides']
        installed[pkg]['requires_removing'] = []
        for p in provides:
            res = dep_needed(p) 
            if res[0] == 0:
                pass
                #print "not needed"
            elif res[0] == 1:
                #print "needed"
                to_remove.remove(pkg)
                break
            else:
                installed[pkg]['requires_removing'] = list(set(installed[pkg]['requires_removing'] + res[1])) # remove duplicates
                #print "needed only by removings " + str(res[1])  
    
    changed = True
    while changed:
        changed = False
        
        for pkg in to_remove[:]:
            if(not installed[pkg]['requires_removing']):
                continue
            for p in installed[pkg]['requires_removing'][:]:
                if p not in to_remove:
                    to_remove.remove(pkg)
                    changed = True
                    break
                if not installed[p]['requires_removing']:
                    installed[pkg]['requires_removing'].remove(p)
                    changed = True
                    
    vprint("Resulting to_remove list (" + str(len(to_remove)) +" items):" + str(to_remove))

def process_packages():
    global flags, rpm_files, installed, repository

    download_packages()
    def readRpmHeader(ts, filename): 
        ''' Read an rpm header. '''
        fd = os.open(filename, os.O_RDONLY) 
        h = ts.hdrFromFdno(fd) 
        os.close(fd)
        return h 
    
    qprint("Generating transaction...")
    ts = rpm.TransactionSet()
    
    # turn all the checks off. They can cause segfault in RPM for now.
    ts.setVSFlags(rpm.RPMVSF_NOHDRCHK|rpm.RPMVSF_NOSHA1HEADER|rpm.RPMVSF_NODSAHEADER|rpm.RPMVSF_NORSAHEADER|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NODSA|rpm.RPMVSF_NORSA|rpm._RPMVSF_NODIGESTS|rpm._RPMVSF_NOSIGNATURES)
    ts.setProbFilter(rpm.RPMPROB_FILTER_OLDPACKAGE)
    
    for pkg in to_update + to_downgrade:
        file = installed[pkg]['downloaded_file']
        h = readRpmHeader(ts, file) 
        ts.addInstall(h, file, 'u')
        
    for pkg in to_remove:
        ts.addErase(pkg)
            
    qprint("Checking dependencies...")
    def format_dep(dep):
        ((name, ver, rel), (namereq, verreq), x, y, z) = dep
        if(verreq):
            verreq = '-' + verreq
        else:
            verreq = ''
        return "Package %s-%s-%s requires %s%s"%(name, ver, rel, namereq, verreq)
    unresolved_dependencies = ts.check() 
    if(unresolved_dependencies):
        eprint("There are some unresolved dependencies: " )
        for dep in unresolved_dependencies:
            eprint("\t" + format_dep(dep))
        eprint("Contact repository maintaiers and send them this information, please.", fatal=True, code=3)
    else:
        qprint("No errors found in transaction")
    ts.order()
    
    qprint("Running trunsaction...")
    #ts.run(runCallback, 1) 

def try_solve_lib_arch(pkgname):
    '''if you have lib64A installed, but there is only libA in repository, it have not to be removed. And vice versa'''
    if not pkgname.startswith('lib'):
        return

    is64 = (pkgname[3:5] == '64')
    is32 = not is64
    
    if pkgname not in repository:
        if is32:
            l32 = pkgname
            l64 = 'lib64' + pkgname[3:]
        else:
            l32 = 'lib' + pkgname[5:]
            l64 = pkgname
            
        e32 = (l32 in repository)
        e64 = (l64 in repository)
        
        if(is32 and e64): # you have 32bit version installed, but there is only 64 bit version in repository
            if(ARCH=="x86_64"):
                installed[pkgname]['alias'] = l64
            else:
                return # 64bit library can not work in 32bit system
        if(is64 and e32):
            installed[pkgname]['alias'] = l32

def print_actions():
    if(command_line_arguments.quiet):
        return
    
    media = media_urls.keys()
    def print_pkg_list(pkglist):
        media_contents = {}
        for medium in media:
            for pkg in pkglist:
                if(repository[pkg]['medium'] == medium):
                    if( medium not in media_contents):
                         media_contents[medium] = []
                    media_contents[medium].append(pkg)
        qprint(" %-30s  %-20s  %-20s" %('Package Name', 'Current Version', 'New Version'))
        for medium in media_contents:
            qprint("(medium " + medium + ")")
            for pkg in sorted(media_contents[medium]):
                epoch1 = installed[pkg]['epoch']
                epoch2 = repository[pkg]['epoch']
                ver1 = installed[pkg]['version'] + '-' + installed[pkg]['release']
                ver2 = repository[pkg]['version']
                if(epoch1 != int(epoch2)):
                    ver1 += ' (epoch ' + str(epoch1) + ')'
                    ver2 += ' (epoch ' + epoch2 + ')'
                qprint(" %-30s  %-20s  %-20s" %(pkg, ver1, ver2))
        qprint('')
        
    if(to_update):            
        qprint("The following packages are going to be upgraded:")
        print_pkg_list(to_update)
    if(to_downgrade):
        qprint("The following packages are going to be downgraded:")
        print_pkg_list(to_downgrade)
    if(to_remove):
        qprint("The following packages are going to be removed:")
        qprint(" %-30s  %-20s" %('Package Name', 'Current Version'))
        for pkg in sorted(to_remove):
            qprint(" %-30s  %-20s" %(pkg, installed[pkg]['version'] + '-' + installed[pkg]['release']))
        qprint('')

def Main():
    global cmd, resolve_source, installed_packages, installed, repository, include_media, exclude_media
    resolve_source = False  # variable that makes download_rpm to download resolved build-deps
    cmd = ['urpmq']
    include_media = []
    if(command_line_arguments.include_media != None):
        media = ''
        for i in command_line_arguments.include_media:
            media = ",".join([media]+i)
            for ii in i:
                include_media.append(ii)
        cmd = cmd + ['--media', media[1:]] 
        
    exclude_media = []
    if(command_line_arguments.exclude_media != None):
        media = ''
        for i in command_line_arguments.exclude_media:
            media = ",".join([media]+i)
            for ii in i:
                exclude_media.append(iii)
        cmd = cmd + ['--excludemedia', media[1:]]
    
    installed = load_installed()
    repository = parse_synthesis()
                     
    
    for inst in installed:
        try_solve_lib_arch(inst)
        if inst not in repository:
            if 'alias' not in installed[inst]:
                to_remove.append(inst)
                continue
            else:
                #do nothing with aliased libraries
                #I can't deal with that yet
                continue
            
        if inst.startswith('kernel'):
            continue
        
        res = rpm.evrCompare(installed[inst]["text_version"], repository[inst]["text_version"])
        if(res == -1):
            to_update.append(inst)
        elif res == 1:
            to_downgrade.append(inst)
        else:  # res == 0
            pass  # do nothing
    
    filter_toremove()
    
    if(len(to_update + to_downgrade + to_remove) == 0):
        qprint("Nothing to do")
        return
    
    print_actions()
    
    vprint("Installed packages: " + str(len(installed)))
    vprint("Packages that need some actions: " + str(len(to_update) + len(to_downgrade) + len(to_remove)))
    
    if(not command_line_arguments.auto):
        sys.stdout.write("Do you want to proceed? (y/n) ")
    
        while(True):
            res = sys.stdin.readline()
            res = res.strip()
            if(res == 'y' or res == 'yes'):
                break
            if(res == 'n' or res == 'no'):
                exit(0)
            
    process_packages()
    
    
VERSION = "urpm-reposync 1.0.2"
if __name__ == '__main__':
    parse_command_line()
    Main()
