Module pymake.core.types

Source code
from copy import copy, deepcopy
import traceback,  importlib
import numpy as np
from collections import OrderedDict, defaultdict
from itertools import product

from pymake import get_pymake_settings
from pymake.util.utils import get_dest_opt_filled, hash_objects, ask_sure_exit, basestring
from pymake.index.indexmanager import IndexManager as IX
from pymake.exceptions import *

import logging
lgg = logging.getLogger('root')


''' Structure of Pymake Objects.
'''


from tabulate import tabulate

# Ugly, integrate.
def _table_(tables, headers=[], max_line=10, max_row=30, name=''):

    if isinstance(headers, str):
        # tables is dict
        # Sort the dict
        ordered_keys = sorted(tables.keys())
        tables = OrderedDict([(k,tables[k]) for k in ordered_keys ])

        _tables = []
        cpt = 0
        max_row = 10
        for k, v in tables.items():
            if cpt % max_row == 0:
                t = OrderedDict()
                _tables.append(t)
            t[k] = v
            cpt += 1

        sep = '# %s'%name +  '\n'+'='*20
        print(sep)
        tables = '\n\n'.join([str(tabulate(t, headers=headers)) for t in _tables])
        return tables
    else:
        # tables is list
        raw = []
        for sec, table in enumerate(tables):
            table = sorted(table, key=lambda x:x[0])
            size = len(table)
            if size == 0:
                continue
            col = int((size-0.1) // max_line)
            junk = max_line % size
            table += ['-']*junk
            table = [table[j:max_line*(i+1)] for i,j in enumerate(range(0, size, max_line))]
            table = np.char.array(table).astype('|S'+str(max_row))
            fmt = 'simple'
            raw.append(tabulate(table.T,
                                headers=[headers[sec]]+['']*(col),
                                tablefmt=fmt))
        sep = '\n'+'='*20+'\n'
        return sep[1:] + sep.join(raw)




# Not sure this one is necessary, or not here
class BaseObject(object):
    ''' Notes : Avoid method conflict by ALWAYS settings this class in last
                at class definitions.
    '''

    def __init__(self, *args, **kwargs):
    #def __init__(self, name='BaseObject'):
        # Le ruban est infini...
        #if name is None:
        #    print(traceback.extract_stack()[-2])
        #    fn,ln,func,text = traceback.extract_stack()[-2]
        #    name = text[:text.find('=')].strip()
        pass

    #def _name(self):
    #    return self.__name__
    def items(self):
        return [(str(i), j) for i,j in enumerate(self)]
    def table(self):
        return tabulate(self.items())

class ExpSpace(dict):
    """ A dictionnary with dot notation access.
        Used for the **expe** settings stream.
    """

    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    #__builtins__.hasattr = hasattr

    def __init__(self, *args, **kwargs):
        super(ExpSpace, self).__init__(*args, **kwargs)
        for arg in args:
            if isinstance(arg, dict):
                for k, v in arg.items():
                    self[k] = v

        if kwargs:
            for k, v in kwargs.items():
                self[k] = v

    def copy(self):
        return type(self)(self)
    def __copy__(self):
        return self.__class__(**self)
    def __deepcopy__(self, memo):
        return self.copy()

    def __getattr__(self, key):
        try:
            return self[key]
        except KeyError:
            # Default pmk settings
            if key == '_write':
                return False

            lgg.debug('an ExpSpace request exceptions occured for key: %s ' % (key))
            raise AttributeError(key)

    # Scratch method because __hasattr__ catch an error in getattr.
    def hasattr(self, key):
        return key in self

    # For Piclking
    def __getstate__(self):
        return self
    def __setstate__(self, state):
        self.update(state)
        self.__dict__ = self

class ExpVector(list, BaseObject):
    ''' A List of elements of an ExpTensor. '''
    def __add__(self, other):
        return self.__class__(list.__add__(self, other))
    def __sub__(self, other):
        return self.__class__([item for item in self if item not in other])

class ExpGroup(list, BaseObject):
    ''' A List of elements of an ExpTensor. '''

    def __init__(self, args, **kwargs):
        if kwargs:
            args = deepcopy(args)

        if isinstance(args, dict):
            args = [args]

        # Don't work well, why ?
        #for i, o in enumerate(args):
        #    if isinstance(o, (dict, ExpGroup)):
        #        args[i] = deepcopy(o)

        list.__init__(self, args)
        BaseObject.__init__(args, **kwargs)

        # Recursively update value if kwargs found.
        if len(kwargs) > 0:
            self.update_all(self, kwargs)

    def update_all(self, l, d):
        for o in l:
            if isinstance(o, list):
                self.update_all(o, d)
            elif isinstance(o, dict):
                for k, v in d.items():
                    o[k] = v
        return

    def __add__(self, other):
        return self.__class__(list.__add__(self, other))
    def __sub__(self, other):
        return self.__class__([item for item in self if item not in other])

class Spec(BaseObject):

    @staticmethod
    def find(spec, field='expe_name'):
        ix = IX(default_index='spec')
        spec = ix.getfirst(spec, field=field)
        return spec

    @staticmethod
    def get(scriptname, *expe):
        ix = IX(default_index='spec')
        raise NotImplementedError

    @staticmethod
    def get_all():
        ix = IX(default_index='spec')
        _res = ix.query(field='expe_name', terms='module_name')
        return _res

    @staticmethod
    def load(expe_name, expe_module):
        # debug to load from module or expe_name !

        p =  expe_module.split('.')
        modula, modulb = '.'.join(p[:-1]), p[-1]
        try:
            expdesign = getattr(importlib.import_module(modula), modulb)
            exp = getattr(expdesign, expe_name)
        except (AttributeError, ModuleNotFoundError) as e:
            raise IndexChangedError("Fatal Error: unable to load spec (%s:%s):  try `pmk update' or try again."% (expe_name, e))

        return exp, expdesign


    @classmethod
    def table(cls):
        ix = IX(default_index='spec')
        t = OrderedDict()
        for elt in ix.query(index='spec', terms=True):
            name = elt['module_name'].split('.')[-1]
            obj, _ = cls.load(elt['expe_name'], elt['module_name'])
            if isinstance(obj, (ExpSpace, ExpTensor, ExpGroup)):
                expes = t.get(name, []) + [ elt['expe_name'] ]
                t[name] = sorted(expes)
        return _table_(t, headers='keys', name=cls.__name__)

    # no more complex.
    # @sortbytype
    @classmethod
    def table_topos(cls, _spec):

        Headers = OrderedDict((('Corpuses', Corpus),
                               ('Models', Model),
                               ('Vector', ExpVector),
                               ('Exp', (ExpSpace, ExpTensor, ExpGroup)),
                               ('Unknown', str)))

        tables = [ [] for i in range(len(Headers))]

        for expe_name, expe_module in _spec.items():
            expe, _ = cls.load(expe_name, expe_module)
            try:
                pos = [isinstance(expe, T) for T in Headers.values()].index(True)
            except ValueError:
                pos = len(Headers) - 1
            tables[pos].append(expe_name)


        return _table_(tables, headers=list(Headers.keys()))



class Script(BaseObject):

    @staticmethod
    def find(script, field='method'):
        ix = IX(default_index='script')
        script = ix.getfirst(script, field=field)
        return script

    @staticmethod
    def get_all(_type='flat'):
        ix = IX(default_index='script')

        if _type == 'flat':
            _res = ix.query(field='method')
        elif _type == 'hierarchical':
            _res = ix.query(field='scriptsurname', terms=True)
        return _res

    @staticmethod
    def get(scriptname, arguments, field='scriptsurname'):

        ix = IX(default_index='script')
        topmethod = ix.getfirst(scriptname, field=field)
        if not topmethod:
            # get the first method that have this name
            topmethod = ix.getfirst(scriptname, field='method')
            if not topmethod:
                return None
                #try:
                #    raise ValueError('error: Unknown script: %s' % (scriptname))
                #except:
                #    # Exception from pyclbr
                #    # index commit race condition I guess.
                #    print('error: Unknown script: %s' % (scriptname))
                #    exit(42)

            arguments = [scriptname] + arguments
            #script_name = topmethod['scriptsurname']

        module = importlib.import_module(topmethod['module'])
        script = getattr(module, topmethod['scriptname'])
        return script, arguments

    @classmethod
    def table(cls):
        ix = IX(default_index='script')
        t = OrderedDict()
        for elt in  ix.query(index='script', terms=True):
            name = elt['scriptname']
            methods = t.get(name, []) + [ elt['method'] ]
            t[name] = sorted(methods)
        return _table_(t, headers='keys', name=cls.__name__)


class Corpus(ExpVector):

    # Meta-grammar / Ontology :
    #   Corpus := {
    #               data_type : text | network | image | ...,
    #               dtype : "specific to a data_type"
    #               name : "object identifier"
    #               path : "disk repo"
    #               source : url | random
    #             }

    # IX integration needed..

    _corpus_data = [
        dict(name='clique'        , data_type='network', data_source='random', directed=False),
        dict(name='generator'     , data_type='network', data_source='random', directed=False, nodes=1000),
        dict(name='graph'         , data_type='network', data_source='random'),
        dict(name='alternate'     , data_type='network', data_source='random', directed=False),
        dict(name='BA'            , data_type='network', data_source='random'),
        dict(name='manufacturing' , data_type='network', data_source='web', directed=True, nodes=167, edges=5784, density=0.209, weighted=True),
        dict(name='fb_uc'         , data_type='network', data_source='web', directed=True, nodes=1899, edges=22195, density=0.006, weighted=True),
        dict(name='blogs'         , data_type='network', data_source='web', directed=True, nodes=1490, edges=19025, density=0.009, weighted=False),
        dict(name='emaileu'       , data_type='network', data_source='web', directed=True, nodes=1005, edges=25571, density=0.025, weighted=False),
        dict(name='propro'        , data_type='network', data_source='web', directed=False, nodes=2113, edges=1432, density=0.001, weighted=False),
        dict(name='euroroad'      , data_type='network', data_source='web', directed=True, nodes=1177, edges=1432, density=0.001, weighted=False),

        # gt
        dict(name='astro-ph',    data_type='network', data_source='gt', directed=False, nodes=16706, edges=121251, weighted=True),
        dict(name='cond-mat',    data_type='network', data_source='gt', directed=False, nodes=16726, edges=47594 , weighted=True),
        dict(name='hep-th',      data_type='network', data_source='gt', directed=False, nodes=8361,  edges=15751 , weighted=True),
        dict(name='netscience',  data_type='network', data_source='gt', directed=False, nodes=1589,  edges=2742  , weighted=True),
        dict(name='email-Enron', data_type='network', data_source='gt', directed=False, nodes=36692, edges=367662, weighted=False), # time weighted

        #dict(name='facebook'     ,  data_type='network', data_source='web', directed=True, nodes=None, edges=None, density=None, wheigted=None),

        #dict(name='reuter50'  , data_type='text', data_source='web'),
        #dict(name='nips12'    , data_type='text', data_source='web'),
        #dict(name='nips'      , data_type='text', data_source='web'),
        #dict(name='enron'     , data_type='text', data_source='web'),
        #dict(name='kos'       , data_type='text', data_source='web'),
        #dict(name='nytimes'   , data_type='text', data_source='web'),
        #dict(name='pubmed'    , data_type='text', data_source='web'),
        #dict(name='20ngroups' , data_type='text', data_source='web'),
        #dict(name='odp'       , data_type='text', data_source='web'),
        #dict(name='wikipedia' , data_type='text', data_source='web'),
        #dict(name='lucene', data_type='text', data_source='lucene'), # needs field spec
        #dict(name='mongo', data_type='text', data_source='mongo'), # needs field spec
    ]

    @classmethod
    def get(cls, corpus_name):
        if not corpus_name:
            return None

        corpus = False

        # index/mongo...
        for data in cls._corpus_data:
            if corpus_name.startswith(data['name']):
                corpus = data.copy()
                break

        return corpus

    @classmethod
    def get_all(cls):
        return cls._corpus_data

class Model(ExpVector):

    @staticmethod
    def get(model_name):
        ix = IX(default_index='model')

        _model =  None
        docir = ix.getfirst(model_name, field='surname')
        if docir:
            mn = importlib.import_module(docir['module'])
            #mn = importlib.import_module(docir['module'], package=local_package)
            _model = getattr(mn, docir['name'], None)
        return _model

    @staticmethod
    def get_all(_type='short'):
        ix = IX(default_index='model')
        if _type == 'short':
            res = ix.query(field='surname')
        elif _type == 'topos':
            _res = ix.query(field='surname', terms=True)
            res = []
            for elt in _res:
                # beurk
                if len(elt['category']) > 0:
                    # means that len(surname.split('.')) > 1
                    names = elt['surname'].split('.')
                    topos = '.'.join(elt['category'].split())
                    surname = '.'.join((names[0],  topos , names[1]))
                else:
                    surname = elt['surname']
                res.append(surname)
        return res

    @classmethod
    def table(cls, _type='short'):
        tables = cls.get_all(_type),
        return _table_(tables, headers=['Models'])


class ExpTensor(OrderedDict, BaseObject):
    ''' Represent a set of Experiences (**expe**). '''
    def __init__(self,  *args, **kwargs):
        OrderedDict.__init__(self, *args, **kwargs)
        BaseObject.__init__(self)

        self._size = 0

    @classmethod
    def from_expe(cls, conf=None, expe=None, parser=None):
        ''' Return the tensor who is an OrderedDict of iterable.
            Assume conf is an exp. Non list value will be listified.

            Parameters
            ----------
            expe : (ExpDesign, ExpSpace or dict)
                A design of experiment.
        '''
        _conf = conf.copy()
        if expe is None:
            expe = conf

        if not issubclass(type(expe), (cls, ExpSpace, dict, ExpVector)):
            raise ValueError('Expe not understood: %s' % type(expe))

        if issubclass(type(expe), Corpus):
            tensor = cls(corpus=expe)
        elif issubclass(type(expe), Model):
            tensor = cls(model=expe)
        elif issubclass(type(expe), ExpVector):
            tensor = cls((str(i),j) for i,j in enumerate(expe))
        elif isinstance(expe, ExpTensor):
            tensor = expe.copy()
        elif isinstance(expe, (dict, ExpSpace)):
            tensor = cls()
            tensor.update_from_dict(expe)
        else:
            raise NotImplementedError('input type of ExpVector unknow %s' % (expe))

        for k, v in tensor.items():
            if not issubclass(type(v), (list, set, tuple)):
                tensor[k] = [v]

        if _conf:
            tensor.update_from_dict(_conf, parser=parser)

        return tensor

    def update_from_dict(self, d, parser=None):
        ''' Update a tensor from a dict

            Parameters
            ----------
            d : dict
                the dict that uptate the tensor
            from_argv : bool
                if True, the is assumed to come from an CLI argparser. if the following conds are true :
                    * the settings in {d} are specified in the CLI (@check already filtererd in GramExp.parseargs)
                    * the settings in {d} is not in the CLI, and not in self.

            Notes
            -----
            SHould inherit _reserved keyword to prevent
        '''

        if parser is not None:
            dests_filled = get_dest_opt_filled(parser)

        for k, v in d.items():
            if k in ['_id_expe']:
                continue

            if parser is not None:
                if not k in dests_filled and k in self :
                    continue

            if issubclass(type(v), ExpVector):
                self[k] = v
            else:
                self[k] = [v]

    def get_size(self, virtual=False):
        if virtual:
            return  np.prod([len(x) for x in self.values()])
        else:
            return self._size


    def push_dict(self, d):
        ''' push one dict inside a exptensor.
            It extend _bind rule to filter the tensor.
        '''
        tensor_len = np.prod([len(x) for x in self.values()])
        if len(self) == 0:
            self.update_from_dict(d)
            return True

        _need_bind = False
        _up_dict = {}
        for k, v in d.items():
            if k in ['_id_expe']:
                continue

            vector = self.get(k, []).copy()
            if v not in vector:
                if len(vector) == 0:
                    _need_bind = True
                    #lgg.debug('setting to bind: (%s : %s)' % (k, v))
                    break
                vector.append(v)
            _up_dict[k] = vector

        if _need_bind:
            #raise NotImplementedError('Need to push bind value to build a tensor from non-overlaping settings.')
            return False
        else:
            self.update(_up_dict)
            return True


    def table(self, extra=[]):
        return tabulate(extra+sorted(self.items(), key=lambda x:x[0]),
                               headers=['Params','Values'])

# @debug : Rename this class to ?
class ExpTensorV2(BaseObject):
    ''' Represent a set of Experiences (**expe**) of type ExpTensor... '''
    def __init__(self, private_keywords=[]):
        BaseObject.__init__(self)
        self._private_keywords = private_keywords

        # --- Those are aligned ---
        self._tensors = [] # list of ExpTensor
        self._bind = []
        self._null = defaultdict(list)
        self._hash = []
        self._ds_ = [] # ExpDesign class per tensor
        #
        self._lod = [] # list of dict
        self._ds = [] # ExpDesign class per expe
        # --- meta ---
        self._conf = {}
        self._size = None

    @classmethod
    def from_conf(cls, conf, _max_expe=2e6, private_keywords=[], expdesign=None):
        gt = cls(private_keywords=private_keywords)
        _spec = conf.pop('_spec', None)
        if not _spec:
            if not expdesign:
                expdesign = ExpDesign
            conf['_name_expe'] = '_default_expe'
            conf['_expe_hash'] = hash_objects(dict((k,v) for k,v in conf.items() if k not in private_keywords))
            gt._tensors.append(ExpTensor.from_expe(conf))
            gt._ds_.append(expdesign)
            return gt

        exp = []
        size_expe = len(_spec)
        consume_expe = 0
        while consume_expe < size_expe:
            o = _spec[consume_expe]
            if isinstance(o, tuple):
                #_type => expdesign
                name, o, _type = o

            if isinstance(o, ExpGroup):
                size_expe += len(o) -1
                _spec = _spec[:consume_expe] + o + _spec[consume_expe+1:]
            elif isinstance(o, list): # ExpVector
                exp.append(o)
                gt._ds_.append(_type)
                consume_expe += 1
            else:
                o['_name_expe'] = name
                o['_expe_hash'] = hash_objects(dict((k,v) for k,v in o.items() if k not in private_keywords))
                if hasattr(_type, '_alias'):
                    o['_alias'] = getattr(_type, '_alias')

                exp.append(o)
                gt._ds_.append(_type)
                consume_expe += 1

            if size_expe > _max_expe:
                lgg.warning('Number of experiences exceeds the hard limit of %d (please review ExpTensor).' % _max_expe)

        gt._tensors.extend([ExpTensor.from_expe(conf, spec) for spec in exp])
        return gt

    def __iter__(self):
        for tensor in self._tensors:
            yield tensor

    def __len__(self):
        return self.get_size()

    def remove_all(self, key):
        if key in self._conf:
            self._conf.pop(key)

        for tensor in self._tensors:
            if key in tensor:
                tensor.pop(key)

        # @Debug self._lod is left untouched...
        # Really ?
        for d in self._lod:
            if key in d:
                d.pop(key)

    def update_all(self, **kwargs):
        self._conf.update(kwargs)

        for tensor in self._tensors:
            tensor.update_from_dict(kwargs)

        for d in self._lod:
            d.update(kwargs)

    def set_default_all(self, defconf):
        ''' set default value in exp '''

        # Update current spec with _default_expe
        for k, v in defconf.items():

            for tensor in self._tensors:
                if not k in tensor:
                    tensor[k] = [v]
            for expe in self._lod:
                if not k in expe:
                    expe[k] = v
            if k in self._conf:
                # @debug: dont test if all the group have this unique value.
                self._conf[k] = v

    def get_all(self, key, default=[]):
        ''' Get all values associated to a given key. '''

        vec = []

        if hasattr(self, '_lod'):
            for d in self._lod:
                if key in d:
                    vec.append(d[key])
        else:
            for tensor in self._tensors:
                vec.extend(tensor.get(key, []))

        if not vec:
            return default
        else:
            return vec

    def get_nounique_keys(self):
        ''' Return key that has gridded (different value occurence in the set of tensor). '''
        keys = defaultdict(set)
        for tensor in self._tensors:
            for k in tensor:
                o = tensor.get(k, [])
                for v in o:
                    if isinstance(v, str) and not v.startswith('_'):
                        keys[k].add(v)

        nounique_keys = []
        for k, _set in keys.items():
            if len(_set) > 1:
                nounique_keys.append(k)

        return nounique_keys


    def get_conf(self):
        _conf = {}
        for tensor in self._tensors:
            for k, v in tensor.items():
                if len(v) != 1:
                    if k in _conf:
                        _conf.pop(k)
                    continue

                if k in _conf and v[0] != _conf[k]:
                    _conf.pop(k)
                    continue
                else:
                    _conf[k] = v[0]

            #_confs.append(_conf)

        self._conf = _conf
        return self._conf

    def get_size(self):
        size = 0
        for tensor in self._tensors:
            size += tensor.get_size()
        self._size = size
        return self._size

    def check_bind(self):
        ''' Rules Filter '''

        for tensor in self._tensors:

            if '_bind' in tensor:
                _bind = tensor.pop('_bind')
                if not isinstance(_bind, list):
                    _bind = [_bind]
                elif len(_bind) ==1 and isinstance(_bind[0], list):
                    _bind = _bind[0]
            else:
                #_bind = getattr(self, '_bind', [])
                _bind = []

            self._bind.append(_bind)

    def check_model_typo(self):
        ''' Assume default module is pymake '''
        for tensor in self._tensors:
            models = tensor.get('model', [])
            for i, m in enumerate(models):

                if not '.' in m:
                    # Set the model ref name
                    pkg = get_pymake_settings('default_model')
                    if len(pkg) > 8:
                        prefix = pkg[:3]
                        if '.' in pkg:
                            prefix  += ''.join(map(lambda x:x[0], pkg.split('.')[1:]))
                    else:
                        prefix = pkg.split('.')[0]

                    models[i] = '%s.%s'%(prefix, m)

    def check_null(self):
        ''' Filter _null '''
        for tensor in self._tensors:
            for k in list(tensor.keys()):
                if '_null' in tensor.get(k, []):
                    v = tensor.pop(k)
                    self._null[k].append(v)

    def make_lod(self, skip_check=False):
        ''' Make a list of Expe from tensor, with filtering '''

        self._lod = []
        for _id, tensor in enumerate(self._tensors):
            lods = self._make_lod(tensor, _id)
            tensor._size = len(lods)
            self._lod.extend(lods)
            self._ds.extend([self._ds_[_id]]*len(lods))

        self._make_hash(skip_check)
        return self._lod

    def _make_lod(self, tensor, _id):
        ''' 1. make dol to lod
            2. filter _bind rule
            3. add special parameter (expe_id)
        '''
        if len(tensor) == 0:
            lod =  []
        else:
            len_l = [len(l) for l in tensor.values()]
            keys = sorted(tensor)
            lod = [dict(zip(keys, prod)) for prod in product(*(tensor[key] for key in keys))]

        # POSTFILTERING
        # Bind Rules
        idtoremove = []
        for expe_id, d in enumerate(lod):
            for rule in self._bind[_id]:
                _bind = rule.split('.')
                values = list(d.values())

                # This is only for  last dot separator process
                for j, e in enumerate(values):
                    if type(e) is str:
                        values[j] = e.split('.')[-1]


                if len(_bind) == 2:
                    # remove all occurence if this bind don't occur
                    # simltaneous in each expe.
                    a, b = _bind
                    if b.startswith('!'):
                        # Exclusif Rule
                        b = b[1:]
                        if a in values and b in values:
                            idtoremove.append(expe_id)
                    else:
                        # Inclusif Rule
                        if a in values and not b in values:
                            idtoremove.append(expe_id)

                elif len(_bind) == 3:
                    # remove occurence of this specific key:value if
                    # it does not comply with this bind.
                    a, b, c = _bind
                    # Get the type of this key:value.
                    _type = type(d[b])
                    if _type is bool:
                        _type = lambda x: True if x in ['True', 'true', '1'] else False

                    if c.startswith('!'):
                        # Exclusif Rule
                        c = c[1:]
                        if a in values and _type(c) == d[b]:
                            idtoremove.append(expe_id)
                    else:
                        # Inclusif Rule
                        if a in values and _type(c) != d[b]:
                            idtoremove.append(expe_id)


        lod = [d for i,d in enumerate(lod) if i not in idtoremove]
        # Save true size of tensor (_bind remove)
        self._tensors[_id]._size = len(lod)

        # Add extra information in lod expes
        n_last_expe = sum([t._size for t in self._tensors[:_id]])
        for _id, expe in enumerate(lod):
            expe['_id_expe'] = _id + n_last_expe

        return lod

    # @todo; lhs for clustering expe applications.
    def _make_hash(self, skip_check=False):
        _hash = []
        n_duplicate = 0
        for _id, _d in enumerate(self._lod):
            d = _d.copy()
            [ d.pop(k) for k in self._private_keywords if k in d and k != '_repeat']
            o = hash_objects(d)
            if o in _hash:
                n_duplicate += 1
            _hash.append(o)

        if n_duplicate > 0 and not skip_check:
            lgg.warning('Duplicate experience: %d' % (n_duplicate))
            ask_sure_exit('Continue [y/n]?')
        self._hash = _hash

    def remake(self, indexs):
        ''' Update the curent tensors by selecting the ${indexs} '''

        self._lod = [self._lod[i] for i in indexs]
        self._tensors = []

        new_tensor = ExpTensor()
        consume_expe = 0
        self._tensors.append(new_tensor)
        while consume_expe < len(self._lod):
            d = self._lod[consume_expe]
            res = new_tensor.push_dict(d)
            if res is False:
                new_tensor = ExpTensor()
                self._tensors.append(new_tensor)
            else:
                consume_expe += 1
                new_tensor._size += 1

    def get_gt(self):
        ''' get Global Tensors.
            No _binding here...
        '''
        gt = {}
        for tensor in self._tensors:
            for k, v in tensor.items():
                _v = gt.get(k,[])
                gt[k] = _v + v
        return gt

    def get_keys(self):
       return list(self.get_gt())

    def table(self):
        tables = []
        for id, group in enumerate(self._tensors):
            src = self._ds[id].__name__
            spec = group.get('_name_expe', ['void'])[0]
            h = '=== %s > %s > %s expe ===' % (src, spec, group.get_size())
            tables.append(h)
            if self._bind:
                extra = [('_bind', self._bind[id])]

            if id == 0:
                headers = ['Params','Values']
            else:
                headers = ''

            tables.append(tabulate(extra+sorted(group.items(), key=lambda x:x[0]), headers=headers))

        return '\n'.join(tables)



class ExpDesign(dict, BaseObject):
    ''' An Ensemble composed of ExpTensors and ExpVectors.
        It is the base class to write group of specification.

        NOTES
        -----
        Special attribute meaning:
            _alias : dict
                use when self._name is called to translate keywords
    '''

    def __init__(self,  *args, **kwargs):
        dict.__init__(self, *args, **kwargs)

        # Not a Ultimate solution to keep a flexibility when defining Exp Design
        for k in dir(self):
            #_spec = ExpDesign((k, getattr(Netw, k)) for k in dir(Netw) if not k.startswith('__') )
            if not k.startswith('_'):
                v = getattr(self, k)
                #if not hasattr(v, '__call__'): # print a warning because hasattr call getattr in expSpace.
                if not callable(v): #  python >3.2
                    self[k] = v
        # @debug: add callable in reserved keyword
        self._reserved_keywords = list(set([w for w in dir(self) if w.startswith('_')] + ['_reserved_keywords']+dir(dict)+dir(BaseObject)))

        BaseObject.__init__(self)

    def _specs(self):
        return [ k for k  in self.keys() if k not in self._reserved_keywords ]


    @classmethod
    def _name(cls, l):
        if getattr(cls, '_alias', None):
            _alias =  cls._alias
        else:
            return l


        if isinstance(l, (set, list, tuple)):
            return [ _alias.get(i, i) for i in l ]
        elif isinstance(l, (dict, ExpSpace)):
            d = dict(l)
            for k, v in d.items():
                if isinstance(v, basestring) and v in _alias:
                    d[k] = _alias[v]
            return d
        else :
            return _alias.get(l, l)

Global variables

var lgg

Structure of Pymake Objects.

Classes

class BaseObject (*args, **kwargs)

Notes : Avoid method conflict by ALWAYS settings this class in last at class definitions.

Source code
class BaseObject(object):
    ''' Notes : Avoid method conflict by ALWAYS settings this class in last
                at class definitions.
    '''

    def __init__(self, *args, **kwargs):
    #def __init__(self, name='BaseObject'):
        # Le ruban est infini...
        #if name is None:
        #    print(traceback.extract_stack()[-2])
        #    fn,ln,func,text = traceback.extract_stack()[-2]
        #    name = text[:text.find('=')].strip()
        pass

    #def _name(self):
    #    return self.__name__
    def items(self):
        return [(str(i), j) for i,j in enumerate(self)]
    def table(self):
        return tabulate(self.items())

Subclasses

Methods

def items(self)
Source code
def items(self):
    return [(str(i), j) for i,j in enumerate(self)]
def table(self)
Source code
def table(self):
    return tabulate(self.items())
class Corpus (*args, **kwargs)

A List of elements of an ExpTensor.

Source code
class Corpus(ExpVector):

    # Meta-grammar / Ontology :
    #   Corpus := {
    #               data_type : text | network | image | ...,
    #               dtype : "specific to a data_type"
    #               name : "object identifier"
    #               path : "disk repo"
    #               source : url | random
    #             }

    # IX integration needed..

    _corpus_data = [
        dict(name='clique'        , data_type='network', data_source='random', directed=False),
        dict(name='generator'     , data_type='network', data_source='random', directed=False, nodes=1000),
        dict(name='graph'         , data_type='network', data_source='random'),
        dict(name='alternate'     , data_type='network', data_source='random', directed=False),
        dict(name='BA'            , data_type='network', data_source='random'),
        dict(name='manufacturing' , data_type='network', data_source='web', directed=True, nodes=167, edges=5784, density=0.209, weighted=True),
        dict(name='fb_uc'         , data_type='network', data_source='web', directed=True, nodes=1899, edges=22195, density=0.006, weighted=True),
        dict(name='blogs'         , data_type='network', data_source='web', directed=True, nodes=1490, edges=19025, density=0.009, weighted=False),
        dict(name='emaileu'       , data_type='network', data_source='web', directed=True, nodes=1005, edges=25571, density=0.025, weighted=False),
        dict(name='propro'        , data_type='network', data_source='web', directed=False, nodes=2113, edges=1432, density=0.001, weighted=False),
        dict(name='euroroad'      , data_type='network', data_source='web', directed=True, nodes=1177, edges=1432, density=0.001, weighted=False),

        # gt
        dict(name='astro-ph',    data_type='network', data_source='gt', directed=False, nodes=16706, edges=121251, weighted=True),
        dict(name='cond-mat',    data_type='network', data_source='gt', directed=False, nodes=16726, edges=47594 , weighted=True),
        dict(name='hep-th',      data_type='network', data_source='gt', directed=False, nodes=8361,  edges=15751 , weighted=True),
        dict(name='netscience',  data_type='network', data_source='gt', directed=False, nodes=1589,  edges=2742  , weighted=True),
        dict(name='email-Enron', data_type='network', data_source='gt', directed=False, nodes=36692, edges=367662, weighted=False), # time weighted

        #dict(name='facebook'     ,  data_type='network', data_source='web', directed=True, nodes=None, edges=None, density=None, wheigted=None),

        #dict(name='reuter50'  , data_type='text', data_source='web'),
        #dict(name='nips12'    , data_type='text', data_source='web'),
        #dict(name='nips'      , data_type='text', data_source='web'),
        #dict(name='enron'     , data_type='text', data_source='web'),
        #dict(name='kos'       , data_type='text', data_source='web'),
        #dict(name='nytimes'   , data_type='text', data_source='web'),
        #dict(name='pubmed'    , data_type='text', data_source='web'),
        #dict(name='20ngroups' , data_type='text', data_source='web'),
        #dict(name='odp'       , data_type='text', data_source='web'),
        #dict(name='wikipedia' , data_type='text', data_source='web'),
        #dict(name='lucene', data_type='text', data_source='lucene'), # needs field spec
        #dict(name='mongo', data_type='text', data_source='mongo'), # needs field spec
    ]

    @classmethod
    def get(cls, corpus_name):
        if not corpus_name:
            return None

        corpus = False

        # index/mongo...
        for data in cls._corpus_data:
            if corpus_name.startswith(data['name']):
                corpus = data.copy()
                break

        return corpus

    @classmethod
    def get_all(cls):
        return cls._corpus_data

Ancestors

Static methods

def get(corpus_name)
Source code
@classmethod
def get(cls, corpus_name):
    if not corpus_name:
        return None

    corpus = False

    # index/mongo...
    for data in cls._corpus_data:
        if corpus_name.startswith(data['name']):
            corpus = data.copy()
            break

    return corpus
def get_all()
Source code
@classmethod
def get_all(cls):
    return cls._corpus_data
class ExpDesign (*args, **kwargs)

An Ensemble composed of ExpTensors and ExpVectors. It is the base class to write group of specification.

NOTES

Special attribute meaning: _alias : dict use when self._name is called to translate keywords

Source code
class ExpDesign(dict, BaseObject):
    ''' An Ensemble composed of ExpTensors and ExpVectors.
        It is the base class to write group of specification.

        NOTES
        -----
        Special attribute meaning:
            _alias : dict
                use when self._name is called to translate keywords
    '''

    def __init__(self,  *args, **kwargs):
        dict.__init__(self, *args, **kwargs)

        # Not a Ultimate solution to keep a flexibility when defining Exp Design
        for k in dir(self):
            #_spec = ExpDesign((k, getattr(Netw, k)) for k in dir(Netw) if not k.startswith('__') )
            if not k.startswith('_'):
                v = getattr(self, k)
                #if not hasattr(v, '__call__'): # print a warning because hasattr call getattr in expSpace.
                if not callable(v): #  python >3.2
                    self[k] = v
        # @debug: add callable in reserved keyword
        self._reserved_keywords = list(set([w for w in dir(self) if w.startswith('_')] + ['_reserved_keywords']+dir(dict)+dir(BaseObject)))

        BaseObject.__init__(self)

    def _specs(self):
        return [ k for k  in self.keys() if k not in self._reserved_keywords ]


    @classmethod
    def _name(cls, l):
        if getattr(cls, '_alias', None):
            _alias =  cls._alias
        else:
            return l


        if isinstance(l, (set, list, tuple)):
            return [ _alias.get(i, i) for i in l ]
        elif isinstance(l, (dict, ExpSpace)):
            d = dict(l)
            for k, v in d.items():
                if isinstance(v, basestring) and v in _alias:
                    d[k] = _alias[v]
            return d
        else :
            return _alias.get(l, l)

Ancestors

class ExpGroup (args, **kwargs)

A List of elements of an ExpTensor.

Source code
class ExpGroup(list, BaseObject):
    ''' A List of elements of an ExpTensor. '''

    def __init__(self, args, **kwargs):
        if kwargs:
            args = deepcopy(args)

        if isinstance(args, dict):
            args = [args]

        # Don't work well, why ?
        #for i, o in enumerate(args):
        #    if isinstance(o, (dict, ExpGroup)):
        #        args[i] = deepcopy(o)

        list.__init__(self, args)
        BaseObject.__init__(args, **kwargs)

        # Recursively update value if kwargs found.
        if len(kwargs) > 0:
            self.update_all(self, kwargs)

    def update_all(self, l, d):
        for o in l:
            if isinstance(o, list):
                self.update_all(o, d)
            elif isinstance(o, dict):
                for k, v in d.items():
                    o[k] = v
        return

    def __add__(self, other):
        return self.__class__(list.__add__(self, other))
    def __sub__(self, other):
        return self.__class__([item for item in self if item not in other])

Ancestors

Methods

def update_all(self, l, d)
Source code
def update_all(self, l, d):
    for o in l:
        if isinstance(o, list):
            self.update_all(o, d)
        elif isinstance(o, dict):
            for k, v in d.items():
                o[k] = v
    return
class ExpSpace (*args, **kwargs)

A dictionnary with dot notation access. Used for the expe settings stream.

Source code
class ExpSpace(dict):
    """ A dictionnary with dot notation access.
        Used for the **expe** settings stream.
    """

    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    #__builtins__.hasattr = hasattr

    def __init__(self, *args, **kwargs):
        super(ExpSpace, self).__init__(*args, **kwargs)
        for arg in args:
            if isinstance(arg, dict):
                for k, v in arg.items():
                    self[k] = v

        if kwargs:
            for k, v in kwargs.items():
                self[k] = v

    def copy(self):
        return type(self)(self)
    def __copy__(self):
        return self.__class__(**self)
    def __deepcopy__(self, memo):
        return self.copy()

    def __getattr__(self, key):
        try:
            return self[key]
        except KeyError:
            # Default pmk settings
            if key == '_write':
                return False

            lgg.debug('an ExpSpace request exceptions occured for key: %s ' % (key))
            raise AttributeError(key)

    # Scratch method because __hasattr__ catch an error in getattr.
    def hasattr(self, key):
        return key in self

    # For Piclking
    def __getstate__(self):
        return self
    def __setstate__(self, state):
        self.update(state)
        self.__dict__ = self

Ancestors

  • builtins.dict

Methods

def copy(self)

D.copy() -> a shallow copy of D

Source code
def copy(self):
    return type(self)(self)
def hasattr(self, key)
Source code
def hasattr(self, key):
    return key in self
class ExpTensor (*args, **kwargs)

Represent a set of Experiences (expe).

Source code
class ExpTensor(OrderedDict, BaseObject):
    ''' Represent a set of Experiences (**expe**). '''
    def __init__(self,  *args, **kwargs):
        OrderedDict.__init__(self, *args, **kwargs)
        BaseObject.__init__(self)

        self._size = 0

    @classmethod
    def from_expe(cls, conf=None, expe=None, parser=None):
        ''' Return the tensor who is an OrderedDict of iterable.
            Assume conf is an exp. Non list value will be listified.

            Parameters
            ----------
            expe : (ExpDesign, ExpSpace or dict)
                A design of experiment.
        '''
        _conf = conf.copy()
        if expe is None:
            expe = conf

        if not issubclass(type(expe), (cls, ExpSpace, dict, ExpVector)):
            raise ValueError('Expe not understood: %s' % type(expe))

        if issubclass(type(expe), Corpus):
            tensor = cls(corpus=expe)
        elif issubclass(type(expe), Model):
            tensor = cls(model=expe)
        elif issubclass(type(expe), ExpVector):
            tensor = cls((str(i),j) for i,j in enumerate(expe))
        elif isinstance(expe, ExpTensor):
            tensor = expe.copy()
        elif isinstance(expe, (dict, ExpSpace)):
            tensor = cls()
            tensor.update_from_dict(expe)
        else:
            raise NotImplementedError('input type of ExpVector unknow %s' % (expe))

        for k, v in tensor.items():
            if not issubclass(type(v), (list, set, tuple)):
                tensor[k] = [v]

        if _conf:
            tensor.update_from_dict(_conf, parser=parser)

        return tensor

    def update_from_dict(self, d, parser=None):
        ''' Update a tensor from a dict

            Parameters
            ----------
            d : dict
                the dict that uptate the tensor
            from_argv : bool
                if True, the is assumed to come from an CLI argparser. if the following conds are true :
                    * the settings in {d} are specified in the CLI (@check already filtererd in GramExp.parseargs)
                    * the settings in {d} is not in the CLI, and not in self.

            Notes
            -----
            SHould inherit _reserved keyword to prevent
        '''

        if parser is not None:
            dests_filled = get_dest_opt_filled(parser)

        for k, v in d.items():
            if k in ['_id_expe']:
                continue

            if parser is not None:
                if not k in dests_filled and k in self :
                    continue

            if issubclass(type(v), ExpVector):
                self[k] = v
            else:
                self[k] = [v]

    def get_size(self, virtual=False):
        if virtual:
            return  np.prod([len(x) for x in self.values()])
        else:
            return self._size


    def push_dict(self, d):
        ''' push one dict inside a exptensor.
            It extend _bind rule to filter the tensor.
        '''
        tensor_len = np.prod([len(x) for x in self.values()])
        if len(self) == 0:
            self.update_from_dict(d)
            return True

        _need_bind = False
        _up_dict = {}
        for k, v in d.items():
            if k in ['_id_expe']:
                continue

            vector = self.get(k, []).copy()
            if v not in vector:
                if len(vector) == 0:
                    _need_bind = True
                    #lgg.debug('setting to bind: (%s : %s)' % (k, v))
                    break
                vector.append(v)
            _up_dict[k] = vector

        if _need_bind:
            #raise NotImplementedError('Need to push bind value to build a tensor from non-overlaping settings.')
            return False
        else:
            self.update(_up_dict)
            return True


    def table(self, extra=[]):
        return tabulate(extra+sorted(self.items(), key=lambda x:x[0]),
                               headers=['Params','Values'])

Ancestors

  • collections.OrderedDict
  • builtins.dict
  • BaseObject

Static methods

def from_expe(conf=None, expe=None, parser=None)

Return the tensor who is an OrderedDict of iterable. Assume conf is an exp. Non list value will be listified.

Parameters

expe : (ExpDesign, ExpSpace or dict)
A design of experiment.
Source code
@classmethod
def from_expe(cls, conf=None, expe=None, parser=None):
    ''' Return the tensor who is an OrderedDict of iterable.
        Assume conf is an exp. Non list value will be listified.

        Parameters
        ----------
        expe : (ExpDesign, ExpSpace or dict)
            A design of experiment.
    '''
    _conf = conf.copy()
    if expe is None:
        expe = conf

    if not issubclass(type(expe), (cls, ExpSpace, dict, ExpVector)):
        raise ValueError('Expe not understood: %s' % type(expe))

    if issubclass(type(expe), Corpus):
        tensor = cls(corpus=expe)
    elif issubclass(type(expe), Model):
        tensor = cls(model=expe)
    elif issubclass(type(expe), ExpVector):
        tensor = cls((str(i),j) for i,j in enumerate(expe))
    elif isinstance(expe, ExpTensor):
        tensor = expe.copy()
    elif isinstance(expe, (dict, ExpSpace)):
        tensor = cls()
        tensor.update_from_dict(expe)
    else:
        raise NotImplementedError('input type of ExpVector unknow %s' % (expe))

    for k, v in tensor.items():
        if not issubclass(type(v), (list, set, tuple)):
            tensor[k] = [v]

    if _conf:
        tensor.update_from_dict(_conf, parser=parser)

    return tensor

Methods

def get_size(self, virtual=False)
Source code
def get_size(self, virtual=False):
    if virtual:
        return  np.prod([len(x) for x in self.values()])
    else:
        return self._size
def push_dict(self, d)

push one dict inside a exptensor. It extend _bind rule to filter the tensor.

Source code
def push_dict(self, d):
    ''' push one dict inside a exptensor.
        It extend _bind rule to filter the tensor.
    '''
    tensor_len = np.prod([len(x) for x in self.values()])
    if len(self) == 0:
        self.update_from_dict(d)
        return True

    _need_bind = False
    _up_dict = {}
    for k, v in d.items():
        if k in ['_id_expe']:
            continue

        vector = self.get(k, []).copy()
        if v not in vector:
            if len(vector) == 0:
                _need_bind = True
                #lgg.debug('setting to bind: (%s : %s)' % (k, v))
                break
            vector.append(v)
        _up_dict[k] = vector

    if _need_bind:
        #raise NotImplementedError('Need to push bind value to build a tensor from non-overlaping settings.')
        return False
    else:
        self.update(_up_dict)
        return True
def table(self, extra=[])
Source code
def table(self, extra=[]):
    return tabulate(extra+sorted(self.items(), key=lambda x:x[0]),
                           headers=['Params','Values'])
def update_from_dict(self, d, parser=None)

Update a tensor from a dict

Parameters

d : dict
the dict that uptate the tensor
from_argv : bool
if True, the is assumed to come from an CLI argparser. if the following conds are true : * the settings in {d} are specified in the CLI (@check already filtererd in GramExp.parseargs) * the settings in {d} is not in the CLI, and not in self.

Notes

SHould inherit _reserved keyword to prevent

Source code
def update_from_dict(self, d, parser=None):
    ''' Update a tensor from a dict

        Parameters
        ----------
        d : dict
            the dict that uptate the tensor
        from_argv : bool
            if True, the is assumed to come from an CLI argparser. if the following conds are true :
                * the settings in {d} are specified in the CLI (@check already filtererd in GramExp.parseargs)
                * the settings in {d} is not in the CLI, and not in self.

        Notes
        -----
        SHould inherit _reserved keyword to prevent
    '''

    if parser is not None:
        dests_filled = get_dest_opt_filled(parser)

    for k, v in d.items():
        if k in ['_id_expe']:
            continue

        if parser is not None:
            if not k in dests_filled and k in self :
                continue

        if issubclass(type(v), ExpVector):
            self[k] = v
        else:
            self[k] = [v]
class ExpTensorV2 (private_keywords=[])

Represent a set of Experiences (expe) of type ExpTensor…

Source code
class ExpTensorV2(BaseObject):
    ''' Represent a set of Experiences (**expe**) of type ExpTensor... '''
    def __init__(self, private_keywords=[]):
        BaseObject.__init__(self)
        self._private_keywords = private_keywords

        # --- Those are aligned ---
        self._tensors = [] # list of ExpTensor
        self._bind = []
        self._null = defaultdict(list)
        self._hash = []
        self._ds_ = [] # ExpDesign class per tensor
        #
        self._lod = [] # list of dict
        self._ds = [] # ExpDesign class per expe
        # --- meta ---
        self._conf = {}
        self._size = None

    @classmethod
    def from_conf(cls, conf, _max_expe=2e6, private_keywords=[], expdesign=None):
        gt = cls(private_keywords=private_keywords)
        _spec = conf.pop('_spec', None)
        if not _spec:
            if not expdesign:
                expdesign = ExpDesign
            conf['_name_expe'] = '_default_expe'
            conf['_expe_hash'] = hash_objects(dict((k,v) for k,v in conf.items() if k not in private_keywords))
            gt._tensors.append(ExpTensor.from_expe(conf))
            gt._ds_.append(expdesign)
            return gt

        exp = []
        size_expe = len(_spec)
        consume_expe = 0
        while consume_expe < size_expe:
            o = _spec[consume_expe]
            if isinstance(o, tuple):
                #_type => expdesign
                name, o, _type = o

            if isinstance(o, ExpGroup):
                size_expe += len(o) -1
                _spec = _spec[:consume_expe] + o + _spec[consume_expe+1:]
            elif isinstance(o, list): # ExpVector
                exp.append(o)
                gt._ds_.append(_type)
                consume_expe += 1
            else:
                o['_name_expe'] = name
                o['_expe_hash'] = hash_objects(dict((k,v) for k,v in o.items() if k not in private_keywords))
                if hasattr(_type, '_alias'):
                    o['_alias'] = getattr(_type, '_alias')

                exp.append(o)
                gt._ds_.append(_type)
                consume_expe += 1

            if size_expe > _max_expe:
                lgg.warning('Number of experiences exceeds the hard limit of %d (please review ExpTensor).' % _max_expe)

        gt._tensors.extend([ExpTensor.from_expe(conf, spec) for spec in exp])
        return gt

    def __iter__(self):
        for tensor in self._tensors:
            yield tensor

    def __len__(self):
        return self.get_size()

    def remove_all(self, key):
        if key in self._conf:
            self._conf.pop(key)

        for tensor in self._tensors:
            if key in tensor:
                tensor.pop(key)

        # @Debug self._lod is left untouched...
        # Really ?
        for d in self._lod:
            if key in d:
                d.pop(key)

    def update_all(self, **kwargs):
        self._conf.update(kwargs)

        for tensor in self._tensors:
            tensor.update_from_dict(kwargs)

        for d in self._lod:
            d.update(kwargs)

    def set_default_all(self, defconf):
        ''' set default value in exp '''

        # Update current spec with _default_expe
        for k, v in defconf.items():

            for tensor in self._tensors:
                if not k in tensor:
                    tensor[k] = [v]
            for expe in self._lod:
                if not k in expe:
                    expe[k] = v
            if k in self._conf:
                # @debug: dont test if all the group have this unique value.
                self._conf[k] = v

    def get_all(self, key, default=[]):
        ''' Get all values associated to a given key. '''

        vec = []

        if hasattr(self, '_lod'):
            for d in self._lod:
                if key in d:
                    vec.append(d[key])
        else:
            for tensor in self._tensors:
                vec.extend(tensor.get(key, []))

        if not vec:
            return default
        else:
            return vec

    def get_nounique_keys(self):
        ''' Return key that has gridded (different value occurence in the set of tensor). '''
        keys = defaultdict(set)
        for tensor in self._tensors:
            for k in tensor:
                o = tensor.get(k, [])
                for v in o:
                    if isinstance(v, str) and not v.startswith('_'):
                        keys[k].add(v)

        nounique_keys = []
        for k, _set in keys.items():
            if len(_set) > 1:
                nounique_keys.append(k)

        return nounique_keys


    def get_conf(self):
        _conf = {}
        for tensor in self._tensors:
            for k, v in tensor.items():
                if len(v) != 1:
                    if k in _conf:
                        _conf.pop(k)
                    continue

                if k in _conf and v[0] != _conf[k]:
                    _conf.pop(k)
                    continue
                else:
                    _conf[k] = v[0]

            #_confs.append(_conf)

        self._conf = _conf
        return self._conf

    def get_size(self):
        size = 0
        for tensor in self._tensors:
            size += tensor.get_size()
        self._size = size
        return self._size

    def check_bind(self):
        ''' Rules Filter '''

        for tensor in self._tensors:

            if '_bind' in tensor:
                _bind = tensor.pop('_bind')
                if not isinstance(_bind, list):
                    _bind = [_bind]
                elif len(_bind) ==1 and isinstance(_bind[0], list):
                    _bind = _bind[0]
            else:
                #_bind = getattr(self, '_bind', [])
                _bind = []

            self._bind.append(_bind)

    def check_model_typo(self):
        ''' Assume default module is pymake '''
        for tensor in self._tensors:
            models = tensor.get('model', [])
            for i, m in enumerate(models):

                if not '.' in m:
                    # Set the model ref name
                    pkg = get_pymake_settings('default_model')
                    if len(pkg) > 8:
                        prefix = pkg[:3]
                        if '.' in pkg:
                            prefix  += ''.join(map(lambda x:x[0], pkg.split('.')[1:]))
                    else:
                        prefix = pkg.split('.')[0]

                    models[i] = '%s.%s'%(prefix, m)

    def check_null(self):
        ''' Filter _null '''
        for tensor in self._tensors:
            for k in list(tensor.keys()):
                if '_null' in tensor.get(k, []):
                    v = tensor.pop(k)
                    self._null[k].append(v)

    def make_lod(self, skip_check=False):
        ''' Make a list of Expe from tensor, with filtering '''

        self._lod = []
        for _id, tensor in enumerate(self._tensors):
            lods = self._make_lod(tensor, _id)
            tensor._size = len(lods)
            self._lod.extend(lods)
            self._ds.extend([self._ds_[_id]]*len(lods))

        self._make_hash(skip_check)
        return self._lod

    def _make_lod(self, tensor, _id):
        ''' 1. make dol to lod
            2. filter _bind rule
            3. add special parameter (expe_id)
        '''
        if len(tensor) == 0:
            lod =  []
        else:
            len_l = [len(l) for l in tensor.values()]
            keys = sorted(tensor)
            lod = [dict(zip(keys, prod)) for prod in product(*(tensor[key] for key in keys))]

        # POSTFILTERING
        # Bind Rules
        idtoremove = []
        for expe_id, d in enumerate(lod):
            for rule in self._bind[_id]:
                _bind = rule.split('.')
                values = list(d.values())

                # This is only for  last dot separator process
                for j, e in enumerate(values):
                    if type(e) is str:
                        values[j] = e.split('.')[-1]


                if len(_bind) == 2:
                    # remove all occurence if this bind don't occur
                    # simltaneous in each expe.
                    a, b = _bind
                    if b.startswith('!'):
                        # Exclusif Rule
                        b = b[1:]
                        if a in values and b in values:
                            idtoremove.append(expe_id)
                    else:
                        # Inclusif Rule
                        if a in values and not b in values:
                            idtoremove.append(expe_id)

                elif len(_bind) == 3:
                    # remove occurence of this specific key:value if
                    # it does not comply with this bind.
                    a, b, c = _bind
                    # Get the type of this key:value.
                    _type = type(d[b])
                    if _type is bool:
                        _type = lambda x: True if x in ['True', 'true', '1'] else False

                    if c.startswith('!'):
                        # Exclusif Rule
                        c = c[1:]
                        if a in values and _type(c) == d[b]:
                            idtoremove.append(expe_id)
                    else:
                        # Inclusif Rule
                        if a in values and _type(c) != d[b]:
                            idtoremove.append(expe_id)


        lod = [d for i,d in enumerate(lod) if i not in idtoremove]
        # Save true size of tensor (_bind remove)
        self._tensors[_id]._size = len(lod)

        # Add extra information in lod expes
        n_last_expe = sum([t._size for t in self._tensors[:_id]])
        for _id, expe in enumerate(lod):
            expe['_id_expe'] = _id + n_last_expe

        return lod

    # @todo; lhs for clustering expe applications.
    def _make_hash(self, skip_check=False):
        _hash = []
        n_duplicate = 0
        for _id, _d in enumerate(self._lod):
            d = _d.copy()
            [ d.pop(k) for k in self._private_keywords if k in d and k != '_repeat']
            o = hash_objects(d)
            if o in _hash:
                n_duplicate += 1
            _hash.append(o)

        if n_duplicate > 0 and not skip_check:
            lgg.warning('Duplicate experience: %d' % (n_duplicate))
            ask_sure_exit('Continue [y/n]?')
        self._hash = _hash

    def remake(self, indexs):
        ''' Update the curent tensors by selecting the ${indexs} '''

        self._lod = [self._lod[i] for i in indexs]
        self._tensors = []

        new_tensor = ExpTensor()
        consume_expe = 0
        self._tensors.append(new_tensor)
        while consume_expe < len(self._lod):
            d = self._lod[consume_expe]
            res = new_tensor.push_dict(d)
            if res is False:
                new_tensor = ExpTensor()
                self._tensors.append(new_tensor)
            else:
                consume_expe += 1
                new_tensor._size += 1

    def get_gt(self):
        ''' get Global Tensors.
            No _binding here...
        '''
        gt = {}
        for tensor in self._tensors:
            for k, v in tensor.items():
                _v = gt.get(k,[])
                gt[k] = _v + v
        return gt

    def get_keys(self):
       return list(self.get_gt())

    def table(self):
        tables = []
        for id, group in enumerate(self._tensors):
            src = self._ds[id].__name__
            spec = group.get('_name_expe', ['void'])[0]
            h = '=== %s > %s > %s expe ===' % (src, spec, group.get_size())
            tables.append(h)
            if self._bind:
                extra = [('_bind', self._bind[id])]

            if id == 0:
                headers = ['Params','Values']
            else:
                headers = ''

            tables.append(tabulate(extra+sorted(group.items(), key=lambda x:x[0]), headers=headers))

        return '\n'.join(tables)

Ancestors

Static methods

def from_conf(conf, private_keywords=[], expdesign=None)
Source code
@classmethod
def from_conf(cls, conf, _max_expe=2e6, private_keywords=[], expdesign=None):
    gt = cls(private_keywords=private_keywords)
    _spec = conf.pop('_spec', None)
    if not _spec:
        if not expdesign:
            expdesign = ExpDesign
        conf['_name_expe'] = '_default_expe'
        conf['_expe_hash'] = hash_objects(dict((k,v) for k,v in conf.items() if k not in private_keywords))
        gt._tensors.append(ExpTensor.from_expe(conf))
        gt._ds_.append(expdesign)
        return gt

    exp = []
    size_expe = len(_spec)
    consume_expe = 0
    while consume_expe < size_expe:
        o = _spec[consume_expe]
        if isinstance(o, tuple):
            #_type => expdesign
            name, o, _type = o

        if isinstance(o, ExpGroup):
            size_expe += len(o) -1
            _spec = _spec[:consume_expe] + o + _spec[consume_expe+1:]
        elif isinstance(o, list): # ExpVector
            exp.append(o)
            gt._ds_.append(_type)
            consume_expe += 1
        else:
            o['_name_expe'] = name
            o['_expe_hash'] = hash_objects(dict((k,v) for k,v in o.items() if k not in private_keywords))
            if hasattr(_type, '_alias'):
                o['_alias'] = getattr(_type, '_alias')

            exp.append(o)
            gt._ds_.append(_type)
            consume_expe += 1

        if size_expe > _max_expe:
            lgg.warning('Number of experiences exceeds the hard limit of %d (please review ExpTensor).' % _max_expe)

    gt._tensors.extend([ExpTensor.from_expe(conf, spec) for spec in exp])
    return gt

Methods

def check_bind(self)

Rules Filter

Source code
def check_bind(self):
    ''' Rules Filter '''

    for tensor in self._tensors:

        if '_bind' in tensor:
            _bind = tensor.pop('_bind')
            if not isinstance(_bind, list):
                _bind = [_bind]
            elif len(_bind) ==1 and isinstance(_bind[0], list):
                _bind = _bind[0]
        else:
            #_bind = getattr(self, '_bind', [])
            _bind = []

        self._bind.append(_bind)
def check_model_typo(self)

Assume default module is pymake

Source code
def check_model_typo(self):
    ''' Assume default module is pymake '''
    for tensor in self._tensors:
        models = tensor.get('model', [])
        for i, m in enumerate(models):

            if not '.' in m:
                # Set the model ref name
                pkg = get_pymake_settings('default_model')
                if len(pkg) > 8:
                    prefix = pkg[:3]
                    if '.' in pkg:
                        prefix  += ''.join(map(lambda x:x[0], pkg.split('.')[1:]))
                else:
                    prefix = pkg.split('.')[0]

                models[i] = '%s.%s'%(prefix, m)
def check_null(self)

Filter _null

Source code
def check_null(self):
    ''' Filter _null '''
    for tensor in self._tensors:
        for k in list(tensor.keys()):
            if '_null' in tensor.get(k, []):
                v = tensor.pop(k)
                self._null[k].append(v)
def get_all(self, key, default=[])

Get all values associated to a given key.

Source code
def get_all(self, key, default=[]):
    ''' Get all values associated to a given key. '''

    vec = []

    if hasattr(self, '_lod'):
        for d in self._lod:
            if key in d:
                vec.append(d[key])
    else:
        for tensor in self._tensors:
            vec.extend(tensor.get(key, []))

    if not vec:
        return default
    else:
        return vec
def get_conf(self)
Source code
def get_conf(self):
    _conf = {}
    for tensor in self._tensors:
        for k, v in tensor.items():
            if len(v) != 1:
                if k in _conf:
                    _conf.pop(k)
                continue

            if k in _conf and v[0] != _conf[k]:
                _conf.pop(k)
                continue
            else:
                _conf[k] = v[0]

        #_confs.append(_conf)

    self._conf = _conf
    return self._conf
def get_gt(self)

get Global Tensors. No _binding here…

Source code
def get_gt(self):
    ''' get Global Tensors.
        No _binding here...
    '''
    gt = {}
    for tensor in self._tensors:
        for k, v in tensor.items():
            _v = gt.get(k,[])
            gt[k] = _v + v
    return gt
def get_keys(self)
Source code
def get_keys(self):
   return list(self.get_gt())
def get_nounique_keys(self)

Return key that has gridded (different value occurence in the set of tensor).

Source code
def get_nounique_keys(self):
    ''' Return key that has gridded (different value occurence in the set of tensor). '''
    keys = defaultdict(set)
    for tensor in self._tensors:
        for k in tensor:
            o = tensor.get(k, [])
            for v in o:
                if isinstance(v, str) and not v.startswith('_'):
                    keys[k].add(v)

    nounique_keys = []
    for k, _set in keys.items():
        if len(_set) > 1:
            nounique_keys.append(k)

    return nounique_keys
def get_size(self)
Source code
def get_size(self):
    size = 0
    for tensor in self._tensors:
        size += tensor.get_size()
    self._size = size
    return self._size
def make_lod(self, skip_check=False)

Make a list of Expe from tensor, with filtering

Source code
def make_lod(self, skip_check=False):
    ''' Make a list of Expe from tensor, with filtering '''

    self._lod = []
    for _id, tensor in enumerate(self._tensors):
        lods = self._make_lod(tensor, _id)
        tensor._size = len(lods)
        self._lod.extend(lods)
        self._ds.extend([self._ds_[_id]]*len(lods))

    self._make_hash(skip_check)
    return self._lod
def remake(self, indexs)

Update the curent tensors by selecting the ${indexs}

Source code
def remake(self, indexs):
    ''' Update the curent tensors by selecting the ${indexs} '''

    self._lod = [self._lod[i] for i in indexs]
    self._tensors = []

    new_tensor = ExpTensor()
    consume_expe = 0
    self._tensors.append(new_tensor)
    while consume_expe < len(self._lod):
        d = self._lod[consume_expe]
        res = new_tensor.push_dict(d)
        if res is False:
            new_tensor = ExpTensor()
            self._tensors.append(new_tensor)
        else:
            consume_expe += 1
            new_tensor._size += 1
def remove_all(self, key)
Source code
def remove_all(self, key):
    if key in self._conf:
        self._conf.pop(key)

    for tensor in self._tensors:
        if key in tensor:
            tensor.pop(key)

    # @Debug self._lod is left untouched...
    # Really ?
    for d in self._lod:
        if key in d:
            d.pop(key)
def set_default_all(self, defconf)

set default value in exp

Source code
def set_default_all(self, defconf):
    ''' set default value in exp '''

    # Update current spec with _default_expe
    for k, v in defconf.items():

        for tensor in self._tensors:
            if not k in tensor:
                tensor[k] = [v]
        for expe in self._lod:
            if not k in expe:
                expe[k] = v
        if k in self._conf:
            # @debug: dont test if all the group have this unique value.
            self._conf[k] = v
def table(self)
Source code
def table(self):
    tables = []
    for id, group in enumerate(self._tensors):
        src = self._ds[id].__name__
        spec = group.get('_name_expe', ['void'])[0]
        h = '=== %s > %s > %s expe ===' % (src, spec, group.get_size())
        tables.append(h)
        if self._bind:
            extra = [('_bind', self._bind[id])]

        if id == 0:
            headers = ['Params','Values']
        else:
            headers = ''

        tables.append(tabulate(extra+sorted(group.items(), key=lambda x:x[0]), headers=headers))

    return '\n'.join(tables)
def update_all(self, **kwargs)
Source code
def update_all(self, **kwargs):
    self._conf.update(kwargs)

    for tensor in self._tensors:
        tensor.update_from_dict(kwargs)

    for d in self._lod:
        d.update(kwargs)
class ExpVector (*args, **kwargs)

A List of elements of an ExpTensor.

Source code
class ExpVector(list, BaseObject):
    ''' A List of elements of an ExpTensor. '''
    def __add__(self, other):
        return self.__class__(list.__add__(self, other))
    def __sub__(self, other):
        return self.__class__([item for item in self if item not in other])

Ancestors

Subclasses

class Model (*args, **kwargs)

A List of elements of an ExpTensor.

Source code
class Model(ExpVector):

    @staticmethod
    def get(model_name):
        ix = IX(default_index='model')

        _model =  None
        docir = ix.getfirst(model_name, field='surname')
        if docir:
            mn = importlib.import_module(docir['module'])
            #mn = importlib.import_module(docir['module'], package=local_package)
            _model = getattr(mn, docir['name'], None)
        return _model

    @staticmethod
    def get_all(_type='short'):
        ix = IX(default_index='model')
        if _type == 'short':
            res = ix.query(field='surname')
        elif _type == 'topos':
            _res = ix.query(field='surname', terms=True)
            res = []
            for elt in _res:
                # beurk
                if len(elt['category']) > 0:
                    # means that len(surname.split('.')) > 1
                    names = elt['surname'].split('.')
                    topos = '.'.join(elt['category'].split())
                    surname = '.'.join((names[0],  topos , names[1]))
                else:
                    surname = elt['surname']
                res.append(surname)
        return res

    @classmethod
    def table(cls, _type='short'):
        tables = cls.get_all(_type),
        return _table_(tables, headers=['Models'])

Ancestors

Static methods

def get(model_name)
Source code
@staticmethod
def get(model_name):
    ix = IX(default_index='model')

    _model =  None
    docir = ix.getfirst(model_name, field='surname')
    if docir:
        mn = importlib.import_module(docir['module'])
        #mn = importlib.import_module(docir['module'], package=local_package)
        _model = getattr(mn, docir['name'], None)
    return _model
def get_all()
Source code
@staticmethod
def get_all(_type='short'):
    ix = IX(default_index='model')
    if _type == 'short':
        res = ix.query(field='surname')
    elif _type == 'topos':
        _res = ix.query(field='surname', terms=True)
        res = []
        for elt in _res:
            # beurk
            if len(elt['category']) > 0:
                # means that len(surname.split('.')) > 1
                names = elt['surname'].split('.')
                topos = '.'.join(elt['category'].split())
                surname = '.'.join((names[0],  topos , names[1]))
            else:
                surname = elt['surname']
            res.append(surname)
    return res
def table()
Source code
@classmethod
def table(cls, _type='short'):
    tables = cls.get_all(_type),
    return _table_(tables, headers=['Models'])
class Script (*args, **kwargs)

Notes : Avoid method conflict by ALWAYS settings this class in last at class definitions.

Source code
class Script(BaseObject):

    @staticmethod
    def find(script, field='method'):
        ix = IX(default_index='script')
        script = ix.getfirst(script, field=field)
        return script

    @staticmethod
    def get_all(_type='flat'):
        ix = IX(default_index='script')

        if _type == 'flat':
            _res = ix.query(field='method')
        elif _type == 'hierarchical':
            _res = ix.query(field='scriptsurname', terms=True)
        return _res

    @staticmethod
    def get(scriptname, arguments, field='scriptsurname'):

        ix = IX(default_index='script')
        topmethod = ix.getfirst(scriptname, field=field)
        if not topmethod:
            # get the first method that have this name
            topmethod = ix.getfirst(scriptname, field='method')
            if not topmethod:
                return None
                #try:
                #    raise ValueError('error: Unknown script: %s' % (scriptname))
                #except:
                #    # Exception from pyclbr
                #    # index commit race condition I guess.
                #    print('error: Unknown script: %s' % (scriptname))
                #    exit(42)

            arguments = [scriptname] + arguments
            #script_name = topmethod['scriptsurname']

        module = importlib.import_module(topmethod['module'])
        script = getattr(module, topmethod['scriptname'])
        return script, arguments

    @classmethod
    def table(cls):
        ix = IX(default_index='script')
        t = OrderedDict()
        for elt in  ix.query(index='script', terms=True):
            name = elt['scriptname']
            methods = t.get(name, []) + [ elt['method'] ]
            t[name] = sorted(methods)
        return _table_(t, headers='keys', name=cls.__name__)

Ancestors

Static methods

def find(script, field='method')
Source code
@staticmethod
def find(script, field='method'):
    ix = IX(default_index='script')
    script = ix.getfirst(script, field=field)
    return script
def get(scriptname, arguments, field='scriptsurname')
Source code
@staticmethod
def get(scriptname, arguments, field='scriptsurname'):

    ix = IX(default_index='script')
    topmethod = ix.getfirst(scriptname, field=field)
    if not topmethod:
        # get the first method that have this name
        topmethod = ix.getfirst(scriptname, field='method')
        if not topmethod:
            return None
            #try:
            #    raise ValueError('error: Unknown script: %s' % (scriptname))
            #except:
            #    # Exception from pyclbr
            #    # index commit race condition I guess.
            #    print('error: Unknown script: %s' % (scriptname))
            #    exit(42)

        arguments = [scriptname] + arguments
        #script_name = topmethod['scriptsurname']

    module = importlib.import_module(topmethod['module'])
    script = getattr(module, topmethod['scriptname'])
    return script, arguments
def get_all()
Source code
@staticmethod
def get_all(_type='flat'):
    ix = IX(default_index='script')

    if _type == 'flat':
        _res = ix.query(field='method')
    elif _type == 'hierarchical':
        _res = ix.query(field='scriptsurname', terms=True)
    return _res
def table()
Source code
@classmethod
def table(cls):
    ix = IX(default_index='script')
    t = OrderedDict()
    for elt in  ix.query(index='script', terms=True):
        name = elt['scriptname']
        methods = t.get(name, []) + [ elt['method'] ]
        t[name] = sorted(methods)
    return _table_(t, headers='keys', name=cls.__name__)
class Spec (*args, **kwargs)

Notes : Avoid method conflict by ALWAYS settings this class in last at class definitions.

Source code
class Spec(BaseObject):

    @staticmethod
    def find(spec, field='expe_name'):
        ix = IX(default_index='spec')
        spec = ix.getfirst(spec, field=field)
        return spec

    @staticmethod
    def get(scriptname, *expe):
        ix = IX(default_index='spec')
        raise NotImplementedError

    @staticmethod
    def get_all():
        ix = IX(default_index='spec')
        _res = ix.query(field='expe_name', terms='module_name')
        return _res

    @staticmethod
    def load(expe_name, expe_module):
        # debug to load from module or expe_name !

        p =  expe_module.split('.')
        modula, modulb = '.'.join(p[:-1]), p[-1]
        try:
            expdesign = getattr(importlib.import_module(modula), modulb)
            exp = getattr(expdesign, expe_name)
        except (AttributeError, ModuleNotFoundError) as e:
            raise IndexChangedError("Fatal Error: unable to load spec (%s:%s):  try `pmk update' or try again."% (expe_name, e))

        return exp, expdesign


    @classmethod
    def table(cls):
        ix = IX(default_index='spec')
        t = OrderedDict()
        for elt in ix.query(index='spec', terms=True):
            name = elt['module_name'].split('.')[-1]
            obj, _ = cls.load(elt['expe_name'], elt['module_name'])
            if isinstance(obj, (ExpSpace, ExpTensor, ExpGroup)):
                expes = t.get(name, []) + [ elt['expe_name'] ]
                t[name] = sorted(expes)
        return _table_(t, headers='keys', name=cls.__name__)

    # no more complex.
    # @sortbytype
    @classmethod
    def table_topos(cls, _spec):

        Headers = OrderedDict((('Corpuses', Corpus),
                               ('Models', Model),
                               ('Vector', ExpVector),
                               ('Exp', (ExpSpace, ExpTensor, ExpGroup)),
                               ('Unknown', str)))

        tables = [ [] for i in range(len(Headers))]

        for expe_name, expe_module in _spec.items():
            expe, _ = cls.load(expe_name, expe_module)
            try:
                pos = [isinstance(expe, T) for T in Headers.values()].index(True)
            except ValueError:
                pos = len(Headers) - 1
            tables[pos].append(expe_name)


        return _table_(tables, headers=list(Headers.keys()))

Ancestors

Static methods

def find(spec, field='expe_name')
Source code
@staticmethod
def find(spec, field='expe_name'):
    ix = IX(default_index='spec')
    spec = ix.getfirst(spec, field=field)
    return spec
def get(scriptname, *expe)
Source code
@staticmethod
def get(scriptname, *expe):
    ix = IX(default_index='spec')
    raise NotImplementedError
def get_all()
Source code
@staticmethod
def get_all():
    ix = IX(default_index='spec')
    _res = ix.query(field='expe_name', terms='module_name')
    return _res
def load(expe_name, expe_module)
Source code
@staticmethod
def load(expe_name, expe_module):
    # debug to load from module or expe_name !

    p =  expe_module.split('.')
    modula, modulb = '.'.join(p[:-1]), p[-1]
    try:
        expdesign = getattr(importlib.import_module(modula), modulb)
        exp = getattr(expdesign, expe_name)
    except (AttributeError, ModuleNotFoundError) as e:
        raise IndexChangedError("Fatal Error: unable to load spec (%s:%s):  try `pmk update' or try again."% (expe_name, e))

    return exp, expdesign
def table()
Source code
@classmethod
def table(cls):
    ix = IX(default_index='spec')
    t = OrderedDict()
    for elt in ix.query(index='spec', terms=True):
        name = elt['module_name'].split('.')[-1]
        obj, _ = cls.load(elt['expe_name'], elt['module_name'])
        if isinstance(obj, (ExpSpace, ExpTensor, ExpGroup)):
            expes = t.get(name, []) + [ elt['expe_name'] ]
            t[name] = sorted(expes)
    return _table_(t, headers='keys', name=cls.__name__)
def table_topos(_spec)
Source code
@classmethod
def table_topos(cls, _spec):

    Headers = OrderedDict((('Corpuses', Corpus),
                           ('Models', Model),
                           ('Vector', ExpVector),
                           ('Exp', (ExpSpace, ExpTensor, ExpGroup)),
                           ('Unknown', str)))

    tables = [ [] for i in range(len(Headers))]

    for expe_name, expe_module in _spec.items():
        expe, _ = cls.load(expe_name, expe_module)
        try:
            pos = [isinstance(expe, T) for T in Headers.values()].index(True)
        except ValueError:
            pos = len(Headers) - 1
        tables[pos].append(expe_name)


    return _table_(tables, headers=list(Headers.keys()))