# -*- coding: utf-8 -*-
# Moovida - Home multimedia server
# Copyright (C) 2006-2009 Fluendo Embedded S.L. (www.fluendo.com).
# All rights reserved.
#
# This file is available under one of two license agreements.
#
# This file is licensed under the GPL version 3.
# See "LICENSE.GPL" in the root of this distribution including a special
# exception to use Moovida with Fluendo's plugins.
#
# The GPL part of Moovida is also available under a commercial licensing
# agreement from Fluendo.
# See "LICENSE.Moovida" in the root directory of this distribution package
# for details on that license.

"""
URI parsing support
"""

__maintainer__ = 'Philippe Normand <philippe@fluendo.com>'


import os, re, urllib, platform, copy
from elisa.core.utils import locale_helper

# for i18n support
from elisa.extern.translation import Translatable

# Compiled regex used to parse URI.
URI_RE = None

class ParseException(Exception):

    def __init__(self, msg):
        self.msg = msg

    def __str__(self):
        return self.msg

def quote(data, not_quote=''):
    """
    Make strings, lists and dictionaries containing strings quoted using
    urllib.quote. Return value has the same type as the parameter given
    to this function. If the data is not a string, list or a dictionary, it is
    simply returned.

    Warning: If you want to quote a path containing '/' as seperators, you
    have to add the slash to not_quote, or it will be quoted also!!!

    @param data: unquoted data which can contain unsafe characters like &=
    @type data:  string, list or dict
    @param not_quote: characters, which shouldn't be quoted.
    @type not_quote: string
    @rtype:      string, list or dict
    """
    sys_encoding = locale_helper.system_encoding()

    if isinstance(data, basestring):
        if isinstance(data, unicode):
            # urllib.quote doesn't support unicode objects very well
            data = data.encode(sys_encoding)
        quoted = urllib.quote(data, not_quote)
    elif isinstance(data, dict):
        quoted = {}
        for k, v in data.iteritems():
            if isinstance(v, unicode):
                # urllib.quote doesn't support unicode objects very well
                v = v.encode(sys_encoding)
            quoted[k] = urllib.quote(v, not_quote)
    elif isinstance(data, list):
        quoted = []
        for entry in data:
            if isinstance(entry, unicode):
                entry = entry.encode(sys_encoding)
            quoted.append(urllib.quote(entry, not_quote))
    elif isinstance(data, int):
        ## FIXME: do we need this/should we do this ?
        quoted = str(data)
    else:
        quoted = data
    return quoted

def unquote(data):
    """
    Unquote strings, lists and dictionaries containing strings quoted values
    using urllib.unquote. Return value has the same type as the parameter given
    to this function. If the data is not a string, list or a dictionary, it is
    simply returned.

    @param data: quoted data
    @type data:  string, list or dict
    @rtype:      string, list or dict of unicodes
    """

    if isinstance(data, basestring):
        if isinstance(data, unicode):
            # urllib.quote doesn't support unicode objects very well
            data = data.encode('utf-8')
        unquoted = urllib.unquote(data).decode('utf-8')
    elif isinstance(data, dict):
        unquoted = {}
        for k, v in data.iteritems():
            if isinstance(v, unicode):
                # urllib.quote doesn't support unicode objects very well
                v = v.encode('utf-8')
            unquoted[k] = urllib.unquote(v).decode('utf-8')
    elif isinstance(data, list):
        unquoted = []
        for entry in data:
            if isinstance(entry, unicode):
                entry = entry.encode('utf-8')
            unquoted.append(urllib.unquote(entry).decode('utf-8'))
    else:
        unquoted = data
    return unquoted

def _unicode(data):
    """
    Convert data to unicode using system encoding instead of ascii.

    @param data: data to convert
    @type data:  string, unicode or other object
    @rtype:      unicode
    """
    sys_encoding = locale_helper.system_encoding()

    if isinstance(data, unicode):
        result = data
    elif isinstance(data, str):
        result = unicode(data, sys_encoding)
    else:
        result = unicode(str(data), sys_encoding)
    return result



class MediaUri(object):
    """ Media URI management

    An URI is structured like this::

       scheme://[user:password@]host[:port]/path[/][?params][#fragment]

    This class is able to parse that and wrap access to all found
    attributes. When I'm parsing file:// URIs I'm allowed to replace
    paths like::

       ./path/to/foo

    With the following::

       "%s/path/to/foo" % os.curdir

    @ivar scheme:   the URI scheme
    @type scheme:   string
    @ivar user:     the optional username
    @type user:     string
    @ivar password: the optional password
    @type password: string
    @ivar host:     the URI hostname
    @type host:     string
    @ivar port:     URI optional port. Set to 0 if not found
    @type port:     int
    @ivar path:     the URI path, delimitted by slashes (/)
    @type path:     string
    @ivar fragment: optional URI fragment
    @type fragment: string

    @ivar extension: the extension of the uri or empty
    @type extension: unicode

    @ivar filename: the filename of the uri, means the part behind the last
                    slash. Could be empty!
    @type filename: unicode

    @ivar label: the label for this uri (per default the same as the filename)
    @type label: unicode or L{elisa.extern.translator.Translateable}

    @ivar parent: the parent uri (means the last part of the path is removed)
    @type parent: L{MediaUri}
    """

    def __init__(self, data):
        """
        Create an Uri from various representations.

        Representation can be either a string containing the uri or
        the components of the uri stored in a dictionary.

        @param data:        An uri stored in a unicode string or described by
                            its components (scheme, user, password, host, port,
                            path, params, fragment), each of them being a string
        @type data:         string or dict
        @raises TypeError:  If none of location or parts has been provided
        @raises ParseException: If the location wasn't correctly parsed
        """
        location = None
        uri_parts = None

        self._label = u''
        self._params = {}

        sys_encoding = locale_helper.system_encoding()

        if isinstance(data, MediaUri):
            # copycat, avoid another parse
            if data.path.find('\\') != -1:
                path = data.path.replace('\\', '/')
            else:
                path = data.path

            uri_parts = {'scheme': data.scheme, 'user': data.user,
                         'password': data.password, 'host': data.host,
                         'port': data.port, 'path': path,
                         'fragment': data.fragment,
                         'params': copy.copy(data._params)}
            self._label = data._label
        elif isinstance(data, dict):
            # FIXME: scheme, host and path cannot be empty according to the docstring
            if data.has_key('path') and data['path'].find('\\') != -1:
                data['path'] = data['path'].replace('\\', '/')
            uri_parts = data
        elif isinstance(data, str) or isinstance(data, unicode):
            if data.find('\\') != -1:
                data = data.replace('\\', '/')
            if data.startswith('file://') and data.startswith('file:///') \
                == False and data.startswith('file://./') == False:
                data = "%s/%s" % (data[0:7], data[7:])

            if isinstance(data, str):
                location = unicode(data, sys_encoding)
            else:
                location = data

        if uri_parts:
            # FIXME: scheme cannot be empty according to the docstring
            self.scheme = _unicode(uri_parts['scheme'])
            self.user = _unicode(uri_parts.get('user', ''))
            self.password = _unicode(uri_parts.get('password', ''))
            # FIXME: host cannot be empty according to the docstring
            #self.host = unicode(uri_parts['host'])
            self.host = _unicode(uri_parts.get('host', ''))
            # port is an integer, according to the docstring
            self.port = uri_parts.get('port', 0)

            self.fragment = _unicode(uri_parts.get('fragment', ''))
            # FIXME: convert params to unicode
            self._params = uri_parts.get('params', {})

            path = uri_parts['path']
            # path should always start with / if it's not null
            # FIXME: why: Sorry, but this unusefull and not at is used in other cases. For e.g. the cdda-uri of gstreamer is: cdda://track-num. This is not working correctly...
            if path and not path.startswith('/'):
                path = "/%s" % path

            self._path = _unicode(path)
        elif location:
            self._parse(location)
        else:
            raise TypeError("No location nor uri parts provided")

    def _uri_re(self):
        # compile the regex only once
        global URI_RE
        if not URI_RE:
            URI_RE = re.compile("^(([^:/?#]+):)?(//(([^:]+)\:([^@]*)@)?)?"
                                "([^?#/]*)([^?#]*)"
                                "(\?([^#]*))?(#(.*))?", re.UNICODE)
        return URI_RE

    def path__get(self):
        if platform.system() == 'Windows':
            if ((len(self._path) > 1 and self._path[0] == '/' and self._path[1] != "/") or \
                len(self._path) == 1) and (self.scheme == 'file' or self.scheme == 'ipod'):
                return self._path[1:]
        return self._path

    def path__set(self, value):
        self._path = value

    path = property(fset=path__set,fget=path__get)

    def _parse(self, location):

        system_name = platform.system()
        match = self._uri_re().search(location)
        if match:
            port = 0
            fscheme, scheme, auth, user_pass, user, passwd, host, path, fparams, params, ffragment, fragment = match.groups()

            if scheme:
                self.scheme = scheme
            else:
                self.scheme = u''

            if user:
                self.user = user
            else:
                self.user = u''

            if passwd:
                self.password = passwd
            else:
                self.password = u''


            host_port = host.split(':')
            if len(host_port) == 2:
                host, port = host_port
                try:
                    port = int(port)
                except:
                    port = -1

            if host:
                self.host = host
            else:
                self.host = u''

            self.port = port

            if fragment:
                self.fragment = fragment
            else:
                self.fragment = u''

            if scheme == 'file':
                # file scheme case: should disallow user/passwd/host
                if self.host == '.':
                    # file://./ special case
                    self.host = u''
                    if path.startswith('/'):
                        path = path[1:]
                    path = os.path.join(os.getcwd(), path)
                    #For windows system
                    if system_name == 'Windows':
                        path = path.replace('\\', '/')

                elif self.host:
                    # transfer host to beginning of path
                    path = "%s%s" % (self.host, path)
                    self.host = u''

                if not path.startswith('/'):
                    # path should always start with /
                    path = "/%s" % path

            self._path = _unicode(path)

            if params:
                key_values = params.split('&')

                # ?foo special case
                if key_values[0].find('=') == -1:
                    key_values = [ "%s=" % key_values[0], ]
                elif key_values[0].rfind('=') != key_values[0].find('='):
                    # special case: params comma separated instead of
                    # &-separated
                    key_values = params.split(',')

                try:
                    self._params = dict([map(_unicode, i.split('=', 1))
                                        for i in key_values])
                except Exception, e:
                    msg = "URI parameters were not quoted in %s" % location
                    raise ParseException(msg)
            else:
                self._params = {}

        else:
            raise ParseException("URI not parseable: %s" % location)

    def set_params(self, values):
        """
        Set a lot of parameters at one time. Attention: it simply
        overrides already existing values!
        @param values:  a dictionary, where the parameter names are pointing to
                        the unquoted values.
        @type values: dict
        """
        for key, value in values.iteritems():
            self.set_param(key, value)

    def set_param(self, name, value):
        """
        Set the parameter 'name' to 'value'. If this parameter is already
        existing it is overritten. The value shouldn't be quoted yet, because
        it is quoted here. That might lead to very bad parameter values!

        @param name:    the name of the paramter
        @type name:     Unicode
        @param value:   unquoted value for the parameter
        @type value:    Unicode
        """
        quoted = quote(value)
        self._params[name] = quoted

    def get_param(self, name, default=u''):
        """
        Get the value for the parameter 'name'. If there is none found, return
        the value of 'default'. If 'default' is not set it is an
        empty unicode.

        @param name:    the name of the parameter
        @type name:     Unicode
        @param default: value that should be returned, if the parameter is not
                        found (per default that is an empty Unicode)
        @type default:  Unicode

        @rtype:         unquoted Unicode
        @return:        paramter or the value of default, if the paramter was
                        not found
        """
        return unquote(self._params.get(name, default))

    def get_params(self):
        """
        Get all params as a dict.
        @rtype:         dict
        @return:        of quoted key value pairs
        """
        return self._params

    def get_params_string(self):
        """
        Get the params as a one-line string (excluding a leading '?').
        @rtype:         unicode
        @return:        key1=value1&key2=value2
        """
        return u'&'.join('='.join((k,v)) for k,v in self._params.iteritems())

    def del_param(self, name):
        """
        Delete the paramter with the name 'name' (and it's value), if it is
        found in the list of parameters.

        @param name:    the name of the paramter to delete
        @type name:     Unicode
        """
        if name in self._params.keys():
            del self._params[name]

    def extension__get(self):
        # FIXME: is os.path.splitext doing any encoding conversion ?
        dummy, ext = os.path.splitext(self._path)
        if ext:
            # strip '.' prefix
            ext = ext[1:].lower()
        return _unicode(ext)

    extension = property(fget=extension__get)

    def filename__get(self):
        """ Return the filename of the Uri.

        Returns last path component label parameter like I{uri://path/to/foo}
        then 'foo' is returned.

        If there is no path, like in I{uri://host:port/} or in I{uri://} an empty
        unicode is returned

        """
        filename = ''

        if self._path and self._path != '/':
            idx = self._path.rfind('/')
            if idx != -1:
                filename = self._path[idx+1:]

        return _unicode(filename)

    filename = property(fget=filename__get)

    def label__set(self, value):
        if isinstance(value, str):
            self._label = _unicode(value)
        elif isinstance(value, unicode):
            self._label = value
        elif isinstance(value, Translatable):
            self._label = value

    def label__get(self):
        """ Return a displayable string designing the Uri. Return last path
        component if the URI has no predetermined label instance attribute set.
        """
        label = self._label
        if not label:
            path = self._path

            if path == '/':
                label = _unicode(path)
            elif path == '':
                label = self.host
            else:
                if path.endswith('/'):
                    path = path[:-1]

                idx = path.rfind('/')
                if idx != -1:
                    label = _unicode(unquote(path[idx+1:]))

        return label

    label = property(fset=label__set, fget=label__get)

    def parent__get(self):
        """ Return the parent URI.

        If the URI is like I{uri://path/to/foo} return I{uri://path/to/}
        """
        p = self._path
        if p.endswith('/'):
            p = p[:-1]
        s = p.split('/')[:-1]
        p = '/'.join(s) + '/'

        path = _unicode(p)

        uri_parts = {'scheme': self.scheme,
                     'user': self.user,
                     'password': self.password, 'host': self.host,
                     'port': self.port, 'path': path}
        uri = MediaUri(uri_parts)
        return uri

    parent = property(fget=parent__get)

    def join(self, path):
        """ Append the given path to my path attribute

        @param path: the path to append at the end of my path
        @type path:  string
        @rtype:      L{MediaUri}
        """
        new_path = self._path
        if not new_path.endswith('/'):
            new_path += '/'
        new_path += path

        new_path = _unicode(new_path)

        uri_parts = {'scheme': self.scheme,
                     'user': self.user,
                     'password': self.password, 'host': self.host,
                     'port': self.port, 'path': new_path,
                     'fragment': self.fragment, 'params': self._params}

        uri = MediaUri(uri_parts)
        return uri

    def _to_unicode(self):
        """ Textual representation of the URI

        @rtype: unicode
        """
        ret = u'%s://' % self.scheme
        if self.user:
            ret += u'%s:%s@' % (self.user, self.password)
        if self.host:
            ret += self.host
            if self.port:
                ret += u':%s' % self.port

        ret += self._path
        if len(self._params):
            ret += u'?'
            if type(self._params) != dict:
                d = self._params.asDict()
            else:
                d = self._params
            ret += u'&'.join('='.join((k,v)) for k,v in d.iteritems())

        if self.fragment:
            ret += u'#%s' % self.fragment

        assert(isinstance(ret, unicode))
        return ret

    __unicode__ = _to_unicode

    def __str__(self):
        """ Byte string representation of the URI

        @rtype: string
        """
        sys_encoding = locale_helper.system_encoding()
        r = self._to_unicode()
        r = r.encode(sys_encoding, 'replace')
        return r

    def __repr__(self):
        return "MediaUri(%s)" % repr(unicode(self))

    def __cmp__(self, other_uri):
        """ Compare myself with another uri.

        @param other_uri: The URI I'm comparing myself with
        @type other_uri:  L{MediaUri}
        @rtype:           C{int}
        @raise TypeError: When trying to compare with non-MediaUri object
        """
        if not isinstance(other_uri, MediaUri):
            raise TypeError("You can't compare MediaURI with other kind of objects")
        return cmp(self._to_unicode(), other_uri._to_unicode())

    def __eq__(self, other_uri):
        """
        @raise TypeError: When trying to compare with non-MediaUri object
        """

        if other_uri is None:
            return False
        if not isinstance(other_uri, MediaUri):
            raise TypeError("You can't compare MediaURI with other kind of objects")
        return self._to_unicode() == other_uri._to_unicode()

    def __ne__(self, other_uri):
        """
        @raise TypeError: When trying to compare with non-MediaUri object
        """

        if not isinstance(other_uri, MediaUri):
            return True
        return self._to_unicode() != other_uri._to_unicode()

    def __nonzero__(self):
        return len(self.scheme) != 0

    def __contains__(self, component):
        return component in self._to_unicode()

    def __add__(self, path):
        return self.join(path)

    def endswith(self, character):
        return self._to_unicode().endswith(character)

    def __getslice__(self, i, j):
        return self._to_unicode()[i:j]

#if __name__ == '__main__':
#    import sys
#    for uri in sys.argv[1:]:
#        u = MediaUri(uri)
#        print u, u.label
#        assert str(u) == uri, "%r != %r" % (u, uri)
