Module pymake.frontend.frontend

Source code
import os
import logging

from pymake import GramExp



class DataBase(object):
    """ Root Class for Frontend Manipulation over Corpuses and Models.

        Given Data Y, and Model M = {\theta, \Phi}
        E[Y] = \theta \phi^T

        Fonctionality are of the frontend decline as:
        1. Frontend for model/algorithm I/O,
        2. Frontend for Corpus Information, and Result Gathering for
            Machine Learning Models.
        3. Data Analisis and Prediction..

        load_corpus -> load_text_corpus -> text_loader
        (frontent)  ->   (choice)       -> (adapt preprocessing)

    """

    log = logging.getLogger('root')

    def __init__(self, expe):
        self.expe = expe

        self._force_load_data = expe.get('_force_load_data', True)
        self._force_save_data = expe.get('_force_save_data', True)

        self.corpus_name = expe.get('corpus')

    #
    # I/O Methods
    #

    @classmethod
    def from_expe(cls):
        raise NotImplementedError

    @classmethod
    def _extract_data(cls):
        ''' Raw data parsing/extraction. '''
        raise NotImplementedError

    @staticmethod
    def get_input_path(expe):
        if '_input_path' in expe:
            return expe['_input_path']
        else:
            input_path =  GramExp.make_input_path(expe)
            expe['_input_path'] = input_path
            return input_path


    @classmethod
    def _resolve_filename(cls, expe):
        input_path = expe._input_path

        if not os.path.exists(input_path):
            cls.log.error("Corpus `%s' Not found." % (input_path))
            print('please run "fetch_networks"')
            self.data = None
            return

        if expe.corpus.endswith('.pk'):
            basename = expe.corpus
        else:
            basename = expe.corpus + '.pk'

        fn = os.path.join(input_path, basename)
        return fn

    @classmethod
    def _load_data(cls, *args, **kwargs):
        ''' Load preprocessed data. '''
        from pymake.io import load
        return load(*args, **kwargs)

    @classmethod
    def _save_data(cls, *args, **kwargs):
        ''' Save preprocessed data. '''
        from pymake.io import save
        return save(*args, **kwargs)

    def configure(self):
        ''' Configure the frontend Data.
        Try the following steps:
            1. Sample the corpus (expe.N),
            2. Build a testset/validation set (expe.testset_ratio & mask),
         '''
        if self.data is None:
            return

        if self.expe.get('exponentiate'):
            MAX = 300
            self.data.ep['weights'].a = 2**self.data.ep['weights'].a
            self.data.ep['weights'].a[self.data.ep['weights'].a > MAX] = int(MAX)
            self.data.ep['weights'].a[self.data.ep['weights'].a < 0] = int(MAX)

        N = self.expe.get('N')
        if N is not None and N != 'all':
            N = int(N)
            self.log.debug('sampling dataset to N=%d ...' % N)
            self.sample(N)

        testset_ratio = self.expe.get('testset_ratio')
        if testset_ratio is not None:
            self.log.debug('Building testset ...')
            self.make_testset()


        if self.expe.get('noise'):
            self.make_noise()

        return

Classes

class DataBase (expe)

Root Class for Frontend Manipulation over Corpuses and Models.

Given Data Y, and Model M = { heta, \Phi} E[Y] = heta \phi^T

Fonctionality are of the frontend decline as: 1. Frontend for model/algorithm I/O, 2. Frontend for Corpus Information, and Result Gathering for Machine Learning Models. 3. Data Analisis and Prediction..

load_corpus -> load_text_corpus -> text_loader (frontent) -> (choice) -> (adapt preprocessing)

Source code
class DataBase(object):
    """ Root Class for Frontend Manipulation over Corpuses and Models.

        Given Data Y, and Model M = {\theta, \Phi}
        E[Y] = \theta \phi^T

        Fonctionality are of the frontend decline as:
        1. Frontend for model/algorithm I/O,
        2. Frontend for Corpus Information, and Result Gathering for
            Machine Learning Models.
        3. Data Analisis and Prediction..

        load_corpus -> load_text_corpus -> text_loader
        (frontent)  ->   (choice)       -> (adapt preprocessing)

    """

    log = logging.getLogger('root')

    def __init__(self, expe):
        self.expe = expe

        self._force_load_data = expe.get('_force_load_data', True)
        self._force_save_data = expe.get('_force_save_data', True)

        self.corpus_name = expe.get('corpus')

    #
    # I/O Methods
    #

    @classmethod
    def from_expe(cls):
        raise NotImplementedError

    @classmethod
    def _extract_data(cls):
        ''' Raw data parsing/extraction. '''
        raise NotImplementedError

    @staticmethod
    def get_input_path(expe):
        if '_input_path' in expe:
            return expe['_input_path']
        else:
            input_path =  GramExp.make_input_path(expe)
            expe['_input_path'] = input_path
            return input_path


    @classmethod
    def _resolve_filename(cls, expe):
        input_path = expe._input_path

        if not os.path.exists(input_path):
            cls.log.error("Corpus `%s' Not found." % (input_path))
            print('please run "fetch_networks"')
            self.data = None
            return

        if expe.corpus.endswith('.pk'):
            basename = expe.corpus
        else:
            basename = expe.corpus + '.pk'

        fn = os.path.join(input_path, basename)
        return fn

    @classmethod
    def _load_data(cls, *args, **kwargs):
        ''' Load preprocessed data. '''
        from pymake.io import load
        return load(*args, **kwargs)

    @classmethod
    def _save_data(cls, *args, **kwargs):
        ''' Save preprocessed data. '''
        from pymake.io import save
        return save(*args, **kwargs)

    def configure(self):
        ''' Configure the frontend Data.
        Try the following steps:
            1. Sample the corpus (expe.N),
            2. Build a testset/validation set (expe.testset_ratio & mask),
         '''
        if self.data is None:
            return

        if self.expe.get('exponentiate'):
            MAX = 300
            self.data.ep['weights'].a = 2**self.data.ep['weights'].a
            self.data.ep['weights'].a[self.data.ep['weights'].a > MAX] = int(MAX)
            self.data.ep['weights'].a[self.data.ep['weights'].a < 0] = int(MAX)

        N = self.expe.get('N')
        if N is not None and N != 'all':
            N = int(N)
            self.log.debug('sampling dataset to N=%d ...' % N)
            self.sample(N)

        testset_ratio = self.expe.get('testset_ratio')
        if testset_ratio is not None:
            self.log.debug('Building testset ...')
            self.make_testset()


        if self.expe.get('noise'):
            self.make_noise()

        return

Subclasses

Class variables

var log

Static methods

def from_expe()
Source code
@classmethod
def from_expe(cls):
    raise NotImplementedError
def get_input_path(expe)
Source code
@staticmethod
def get_input_path(expe):
    if '_input_path' in expe:
        return expe['_input_path']
    else:
        input_path =  GramExp.make_input_path(expe)
        expe['_input_path'] = input_path
        return input_path

Methods

def configure(self)

Configure the frontend Data. Try the following steps: 1. Sample the corpus (expe.N), 2. Build a testset/validation set (expe.testset_ratio & mask),

Source code
def configure(self):
    ''' Configure the frontend Data.
    Try the following steps:
        1. Sample the corpus (expe.N),
        2. Build a testset/validation set (expe.testset_ratio & mask),
     '''
    if self.data is None:
        return

    if self.expe.get('exponentiate'):
        MAX = 300
        self.data.ep['weights'].a = 2**self.data.ep['weights'].a
        self.data.ep['weights'].a[self.data.ep['weights'].a > MAX] = int(MAX)
        self.data.ep['weights'].a[self.data.ep['weights'].a < 0] = int(MAX)

    N = self.expe.get('N')
    if N is not None and N != 'all':
        N = int(N)
        self.log.debug('sampling dataset to N=%d ...' % N)
        self.sample(N)

    testset_ratio = self.expe.get('testset_ratio')
    if testset_ratio is not None:
        self.log.debug('Building testset ...')
        self.make_testset()


    if self.expe.get('noise'):
        self.make_noise()

    return