Module pymake.frontend.drivers
Source code
from numpy import ma
import numpy as np
import logging
try:
import pandas as pd
except Exception as e:
print('Error while importing pandas: %s' % e)
class DatasetDriver(object):
''' Parse dataset file using pandas'''
_comment = '%'
log = logging.getLogger('root')
# No pandas here....
@classmethod
def parse_tnet(cls, fn, sep=' '):
''' Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ? '''
cls.log.debug('opening file: %s' % fn)
with open(fn) as f:
content = f.read()
lines = list(filter(None, content.split('\n')))
line1_length = lines[0].strip().split(sep)
edges = {}
if len(line1_length) == 2:
# format 'i j' if edges.
data_file_format = 'txt'
for line in lines:
dyad = line.strip().split(sep)
dyad = '.'.join(dyad)
edges[dyad] = edges.get(dyad, 0) + 1
#edges = [l.strip().split(sep) for l in lines]
elif len(line1_length) == 5:
# format '"date" i j weight'.
data_file_format = 'tnet'
for line in lines:
_line = line.strip().split(sep)
dyad = _line[-3:-1]
dyad = '.'.join(dyad)
w = int(_line[-1])
edges[dyad] = edges.get(dyad, 0) + w
#edges = [l.strip().split(sep)[-3:-1] for l in lines]
edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1
edges[:, 0:2] -= edges[:, 0:2].min()
N = edges[:, 0:2].max()+1
g = np.zeros((N,N))
g[tuple(edges[:, :2].T)] = edges[:, 2]
data = dict(data=g)
return data
# No pandas here....
@classmethod
def parse_csv(cls, fn, sep=';'):
''' Grammar retro-ingennired from manufacturing.csv '''
cls.log.debug('opening file: %s' % fn)
with open(fn, 'r') as f:
content = f.read()
lines = list(filter(None, content.split('\n')))[1:]
edges = {}
for line in lines:
dyad = line.strip().split(sep)[0:2]
dyad = '.'.join(dyad)
edges[dyad] = edges.get(dyad, 0) + 1
#edges = [l.strip().split(sep)[0:2] for l in lines]
#edges = np.array([ (e[0], e[1]) for e in edges], dtype=int) -1
edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1
edges[:, 0:2] -= edges[:, 0:2].min()
N = edges[:, 0:2].max()+1
g = np.zeros((N,N))
g[tuple(edges[:, :2].T)] = edges[:, 2]
data = dict(data=g)
return data
@classmethod
def parse_dancer(cls, fn, sep=';'):
""" Parse Network data depending on type/extension """
cls.log.debug('opening file: %s' % fn)
data = pd.read_csv(fn, sep=sep, names=['n', 'feat', 'cluster' ], comment=cls._comment)
parameters = data.dropna()
clusters = parameters['cluster'].values.astype(int)
features = np.array([list(map(float, f.split('|'))) for f in parameters['feat'].values])
data = data.ix[data['cluster'].isna()]
data['cluster'] = 1 # <= the weight
data = data.loc[pd.to_numeric(data['n'], errors='coerce').dropna().index].as_matrix().astype(int)
data[:, 0:2] -= data[:, 0:2].min()
N = data[:, 0:2].max()+1
y = np.zeros((N,N))
e_l = data[:,2] > 0
e_ix = data[:, 0:2][e_l]
ix = list(zip(*e_ix))
y[ix] = data[:,2][e_l]
data = dict(data=y, clusters=clusters, features=features)
return data
@classmethod
def parse_dat(cls, fn, sep="\s+"):
""" Parse Network data depending on type/extension """
cls.log.debug('opening file: %s' % fn)
def _row_len(fn):
''' Seek for the length of the csv row, then break quicly '''
inside = {'vertices':False, 'edges':False }
data = []
for _line in open(fn):
line = _line.strip()
if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']:
if not inside['vertices']:
inside['vertices'] = True
continue
if line.startswith('#') or not line.strip():
inside['vertices'] = False # break
elif line.startswith(('DATA','*edges' )):
inside['vertices'] = False # break
inside['edges'] = True
else:
continue
elif line.startswith(('DATA','*edges' )) or inside['edges']:
if not inside['edges']:
inside['edges'] = True # break
continue
if line.startswith('#') or not line.strip() or len(line.split()) < 2 :
inside['edges'] = False
else:
# Parsing assignation
data.append( line.split() )
break
return len(data[0])
# Sender, Reiceiver, Edges
row_len = _row_len(fn)
if row_len == 3:
cols = ['s', 'r', 'weight']
elif row_len == 2:
cols = ['s', 'r']
else:
raise ValueError('I/O error for dataset file: %s' % fn)
data = pd.read_csv(fn, sep=sep, names=cols, comment=cls._comment)
if len(cols) == 2:
data['weight'] = np.ones(data.shape[0])
cols = ['s', 'r', 'weight']
cond = pd.to_numeric(data['s'], errors='coerce').dropna().index & pd.to_numeric(data['r'], errors='coerce').dropna().index
data = data.loc[cond].as_matrix().astype(int)
data[:, 0:2] -= data[:, 0:2].min()
N = data[:, 0:2].max()+1
y = np.zeros((N,N))
e_l = data[:,2] > 0
e_ix = data[:, 0:2][e_l]
ix = list(zip(*e_ix))
y[ix] = data[:,2][e_l]
data = dict(data=y)
return data
class OnlineDatasetDriver(object):
''' Parse dataset file using pandas'''
_comment = '%'
log = logging.getLogger('root')
@classmethod
def parse_tnet(cls, fn, sep=' '):
''' Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ? '''
cls.log.debug('opening file: %s' % fn)
for line in open(fn):
line = line.strip()
if not line:
continue
line1_length = line.split(sep)
if len(line1_length) == 2:
# format 'i j' if edges.
data_file_format = 'txt'
v1, v2 = line.strip().split(sep)
w = 1
yield int(v1), int(v2), w, None
elif len(line1_length) == 5:
# format '"date" i j weight'.
data_file_format = 'tnet'
_line = line.strip().split(sep)
v1, v2 = _line[-3:-1]
w = int(_line[-1])
if w == 0:
continue
else:
yield int(v1), int(v2), w, None
@classmethod
def parse_csv(cls, fn, sep=';'):
''' Grammar retro-ingennired from manufacturing.csv '''
cls.log.debug('opening file: %s' % fn)
cpt = 0
for line in open(fn):
if cpt == 0:
# Ignore first status line
cpt += 1
continue
v1, v2 = line.strip().split(sep)[0:2]
w = 1
yield int(v1), int(v2), w, None
@classmethod
def parse_dancer(cls, fn, sep=';'):
cls.log.debug('opening file: %s' % fn)
inside = {'vertices':False, 'edges':False }
for line in open(fn):
line = line.strip()
if line.startswith('# Vertices') or inside['vertices']:
if not inside['vertices']:
inside['vertices'] = True
continue
if line.startswith('#') or not line.strip() :
inside['vertices'] = False # break
else:
# Parsing assignation
elements = line.strip().split(sep)
index = int(elements[0])
clust = int(elements[-1])
feats = list(map(float, elements[-2].split('|')))
obj = {'cluster': clust, 'features': feats, 'index':index}
yield obj
elif line.startswith('# Edges') or inside['edges']:
if not inside['edges']:
inside['edges'] = True
continue
if line.startswith('#') or not line.strip() :
inside['edges'] = False # break
else:
# Parsing assignation
v1, v2 = line.split(sep)
w = 1
yield int(v1), int(v2), w, None
@classmethod
def parse_dat(cls, fn, sep=" "):
""" Parse Network data depending on type/extension """
cls.log.debug('opening file: %s' % fn)
inside = {'vertices':False, 'edges':False }
for line in open(fn):
line = line.strip()
if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']:
if not inside['vertices']:
inside['vertices'] = True
continue
if line.startswith('#') or not line.strip():
inside['vertices'] = False # break
elif line.startswith(('DATA','*edges' )):
inside['vertices'] = False # break
inside['edges'] = True
else:
continue
elif line.startswith(('DATA','*edges' )) or inside['edges']:
if not inside['edges']:
inside['edges'] = True # break
continue
if line.startswith('#') or not line.strip() or len(line.split()) < 2 :
inside['edges'] = False
else:
# Parsing assignation
splitline = line.split(sep)
row_size = len(splitline)
if row_size == 2:
# like .txt
v1, v2 = splitline
w = 1
yield int(v1), int(v2), w, None
elif row_size == 3:
v1, v2 = splitline[0:2]
w = int(splitline[2])
if w == 0:
continue
else:
yield int(v1), int(v2), w, None
else:
raise NotImplementedError
class RawDatasetDriver(object):
''' Parse dataset file using python loop (deprecated) '''
_comment = '%'
log = logging.getLogger('root')
@classmethod
def parse_tnet(cls, fn, sep=' '):
''' Grammar retro-ingennired from fb/emaileu.txt '''
cls.log.debug('opening file: %s' % fn)
with open(fn) as f:
content = f.read()
lines = list(filter(None, content.split('\n')))
line1_length = lines[0].strip().split(sep)
edges = {}
if len(line1_length) == 2:
# format 'i j' if edges.
data_file_format = 'txt'
for line in lines:
dyad = line.strip().split(sep)
dyad = '.'.join(dyad)
edges[dyad] = edges.get(dyad, 0) + 1
#edges = [l.strip().split(sep) for l in lines]
elif len(line1_length) == 5:
# format '"date" i j weight'.
data_file_format = 'tnet'
for line in lines:
_line = line.strip().split(sep)
dyad = _line[-3:-1]
dyad = '.'.join(dyad)
w = int(_line[-1])
edges[dyad] = edges.get(dyad, 0) + w
#edges = [l.strip().split(sep)[-3:-1] for l in lines]
edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1
N = edges.max() +1
#N = max(list(itertools.chain(*edges))) + 1
g = np.zeros((N,N))
g[tuple(edges[:, :2].T)] = edges[:, 2]
data = dict(data=g)
return data
@classmethod
def parse_csv(cls, fn, sep=';'):
''' Grammar retro-ingennired from manufacturing.csv '''
cls.log.debug('opening file: %s' % fn)
with open(fn) as f:
content = f.read()
lines = list(filter(None, content.split('\n')))[1:]
edges = {}
for line in lines:
dyad = line.strip().split(sep)[0:2]
dyad = '.'.join(dyad)
edges[dyad] = edges.get(dyad, 0) + 1
#edges = [l.strip().split(sep)[0:2] for l in lines]
#edges = np.array([ (e[0], e[1]) for e in edges], dtype=int) -1
edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1
N = edges.max() +1
#N = max(list(itertools.chain(*edges))) + 1
g = np.zeros((N,N))
g[tuple(edges[:, :2].T)] = edges[:, 2]
data = dict(data=g)
return data
@classmethod
def parse_dancer(cls, fn, sep=';'):
""" Parse Network data depending on type/extension """
cls.log.debug('opening file: %s' % fn)
data = []
inside = {'vertices':False, 'edges':False }
clusters = []
features = []
for line in open(fn):
if line.startswith('# Vertices') or inside['vertices']:
if not inside['vertices']:
inside['vertices'] = True
N = 0
continue
if line.startswith('#') or not line.strip() :
inside['vertices'] = False # break
else:
# Parsing assignation
elements = line.strip().split(sep)
clust = int(elements[-1])
feats = list(map(float, elements[-2].split('|')))
clusters.append(clust)
features.append(feats)
N += 1
elif line.startswith('# Edges') or inside['edges']:
if not inside['edges']:
inside['edges'] = True
continue
if line.startswith('#') or not line.strip() :
inside['edges'] = False # break
else:
# Parsing assignation
data.append( line.strip() )
edges = np.array([tuple(row.split(sep)) for row in data]).astype(int)
g = np.zeros((N,N))
g[[e[0] for e in edges], [e[1] for e in edges]] = 1
g[[e[1] for e in edges], [e[0] for e in edges]] = 1
# ?! .T
try:
parameters = parse_file_conf(os.path.join(os.path.dirname(fn), 'parameters'))
parameters['devs'] = list(map(float, parameters['devs'].split(sep)))
except IOError:
parameters = {}
finally:
# @Obsolete !
parameters_ = parameters
clusters = clusters
features = np.array(features)
data = dict(data=g, clusters=clusters, features=features)
return data
@classmethod
def parse_dat(cls, fn, sep=' '):
""" Parse Network data depending on type/extension """
cls.log.debug('opening file: %s' % fn)
data = []
inside = {'vertices':False, 'edges':False }
for _line in open(fn):
line = _line.strip()
if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']:
if not inside['vertices']:
inside['vertices'] = True
continue
if line.startswith('#') or not line.strip():
inside['vertices'] = False # break
elif line.startswith(('DATA','*edges' )):
inside['vertices'] = False # break
inside['edges'] = True
else:
# todo if needed
continue
elif line.startswith(('DATA','*edges' )) or inside['edges']:
if not inside['edges']:
inside['edges'] = True # break
continue
if line.startswith('#') or not line.strip() or len(line.split(sep)) < 2 :
inside['edges'] = False
else:
# Parsing assignation
data.append( line.strip() )
row_size = len(data[0].split(sep))
edges = np.array([tuple(row.split(sep)) for row in data]).astype(int)-1
edges = {}
if row_size == 2:
# like .txt
for line in data:
dyad = line.strip().split(sep)
dyad = '.'.join(dyad)
edges[dyad] = edges.get(dyad, 0) + 1
elif row_size == 3:
for line in data:
_line = line.strip().split(sep)
dyad = _line[0:2]
dyad = '.'.join(dyad)
w = int(_line[-1]) # can be zeros
edges[dyad] = edges.get(dyad, 0) + int(w)
else:
raise NotImplementedError
edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1
N = edges.max() +1
g = np.zeros((N,N))
g[tuple(edges[:, :2].T)] = edges[:, 2]
data = dict(data=g)
return data
Classes
class DatasetDriver (*args, **kwargs)
-
Parse dataset file using pandas
Source code
class DatasetDriver(object): ''' Parse dataset file using pandas''' _comment = '%' log = logging.getLogger('root') # No pandas here.... @classmethod def parse_tnet(cls, fn, sep=' '): ''' Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ? ''' cls.log.debug('opening file: %s' % fn) with open(fn) as f: content = f.read() lines = list(filter(None, content.split('\n'))) line1_length = lines[0].strip().split(sep) edges = {} if len(line1_length) == 2: # format 'i j' if edges. data_file_format = 'txt' for line in lines: dyad = line.strip().split(sep) dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep) for l in lines] elif len(line1_length) == 5: # format '"date" i j weight'. data_file_format = 'tnet' for line in lines: _line = line.strip().split(sep) dyad = _line[-3:-1] dyad = '.'.join(dyad) w = int(_line[-1]) edges[dyad] = edges.get(dyad, 0) + w #edges = [l.strip().split(sep)[-3:-1] for l in lines] edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 edges[:, 0:2] -= edges[:, 0:2].min() N = edges[:, 0:2].max()+1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data # No pandas here.... @classmethod def parse_csv(cls, fn, sep=';'): ''' Grammar retro-ingennired from manufacturing.csv ''' cls.log.debug('opening file: %s' % fn) with open(fn, 'r') as f: content = f.read() lines = list(filter(None, content.split('\n')))[1:] edges = {} for line in lines: dyad = line.strip().split(sep)[0:2] dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep)[0:2] for l in lines] #edges = np.array([ (e[0], e[1]) for e in edges], dtype=int) -1 edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 edges[:, 0:2] -= edges[:, 0:2].min() N = edges[:, 0:2].max()+1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data @classmethod def parse_dancer(cls, fn, sep=';'): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) data = pd.read_csv(fn, sep=sep, names=['n', 'feat', 'cluster' ], comment=cls._comment) parameters = data.dropna() clusters = parameters['cluster'].values.astype(int) features = np.array([list(map(float, f.split('|'))) for f in parameters['feat'].values]) data = data.ix[data['cluster'].isna()] data['cluster'] = 1 # <= the weight data = data.loc[pd.to_numeric(data['n'], errors='coerce').dropna().index].as_matrix().astype(int) data[:, 0:2] -= data[:, 0:2].min() N = data[:, 0:2].max()+1 y = np.zeros((N,N)) e_l = data[:,2] > 0 e_ix = data[:, 0:2][e_l] ix = list(zip(*e_ix)) y[ix] = data[:,2][e_l] data = dict(data=y, clusters=clusters, features=features) return data @classmethod def parse_dat(cls, fn, sep="\s+"): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) def _row_len(fn): ''' Seek for the length of the csv row, then break quicly ''' inside = {'vertices':False, 'edges':False } data = [] for _line in open(fn): line = _line.strip() if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip(): inside['vertices'] = False # break elif line.startswith(('DATA','*edges' )): inside['vertices'] = False # break inside['edges'] = True else: continue elif line.startswith(('DATA','*edges' )) or inside['edges']: if not inside['edges']: inside['edges'] = True # break continue if line.startswith('#') or not line.strip() or len(line.split()) < 2 : inside['edges'] = False else: # Parsing assignation data.append( line.split() ) break return len(data[0]) # Sender, Reiceiver, Edges row_len = _row_len(fn) if row_len == 3: cols = ['s', 'r', 'weight'] elif row_len == 2: cols = ['s', 'r'] else: raise ValueError('I/O error for dataset file: %s' % fn) data = pd.read_csv(fn, sep=sep, names=cols, comment=cls._comment) if len(cols) == 2: data['weight'] = np.ones(data.shape[0]) cols = ['s', 'r', 'weight'] cond = pd.to_numeric(data['s'], errors='coerce').dropna().index & pd.to_numeric(data['r'], errors='coerce').dropna().index data = data.loc[cond].as_matrix().astype(int) data[:, 0:2] -= data[:, 0:2].min() N = data[:, 0:2].max()+1 y = np.zeros((N,N)) e_l = data[:,2] > 0 e_ix = data[:, 0:2][e_l] ix = list(zip(*e_ix)) y[ix] = data[:,2][e_l] data = dict(data=y) return data
Subclasses
Class variables
var log
Static methods
def parse_csv(fn, sep=';')
-
Grammar retro-ingennired from manufacturing.csv
Source code
@classmethod def parse_csv(cls, fn, sep=';'): ''' Grammar retro-ingennired from manufacturing.csv ''' cls.log.debug('opening file: %s' % fn) with open(fn, 'r') as f: content = f.read() lines = list(filter(None, content.split('\n')))[1:] edges = {} for line in lines: dyad = line.strip().split(sep)[0:2] dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep)[0:2] for l in lines] #edges = np.array([ (e[0], e[1]) for e in edges], dtype=int) -1 edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 edges[:, 0:2] -= edges[:, 0:2].min() N = edges[:, 0:2].max()+1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data
def parse_dancer(fn, sep=';')
-
Parse Network data depending on type/extension
Source code
@classmethod def parse_dancer(cls, fn, sep=';'): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) data = pd.read_csv(fn, sep=sep, names=['n', 'feat', 'cluster' ], comment=cls._comment) parameters = data.dropna() clusters = parameters['cluster'].values.astype(int) features = np.array([list(map(float, f.split('|'))) for f in parameters['feat'].values]) data = data.ix[data['cluster'].isna()] data['cluster'] = 1 # <= the weight data = data.loc[pd.to_numeric(data['n'], errors='coerce').dropna().index].as_matrix().astype(int) data[:, 0:2] -= data[:, 0:2].min() N = data[:, 0:2].max()+1 y = np.zeros((N,N)) e_l = data[:,2] > 0 e_ix = data[:, 0:2][e_l] ix = list(zip(*e_ix)) y[ix] = data[:,2][e_l] data = dict(data=y, clusters=clusters, features=features) return data
def parse_dat(fn, sep='\\s+')
-
Parse Network data depending on type/extension
Source code
@classmethod def parse_dat(cls, fn, sep="\s+"): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) def _row_len(fn): ''' Seek for the length of the csv row, then break quicly ''' inside = {'vertices':False, 'edges':False } data = [] for _line in open(fn): line = _line.strip() if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip(): inside['vertices'] = False # break elif line.startswith(('DATA','*edges' )): inside['vertices'] = False # break inside['edges'] = True else: continue elif line.startswith(('DATA','*edges' )) or inside['edges']: if not inside['edges']: inside['edges'] = True # break continue if line.startswith('#') or not line.strip() or len(line.split()) < 2 : inside['edges'] = False else: # Parsing assignation data.append( line.split() ) break return len(data[0]) # Sender, Reiceiver, Edges row_len = _row_len(fn) if row_len == 3: cols = ['s', 'r', 'weight'] elif row_len == 2: cols = ['s', 'r'] else: raise ValueError('I/O error for dataset file: %s' % fn) data = pd.read_csv(fn, sep=sep, names=cols, comment=cls._comment) if len(cols) == 2: data['weight'] = np.ones(data.shape[0]) cols = ['s', 'r', 'weight'] cond = pd.to_numeric(data['s'], errors='coerce').dropna().index & pd.to_numeric(data['r'], errors='coerce').dropna().index data = data.loc[cond].as_matrix().astype(int) data[:, 0:2] -= data[:, 0:2].min() N = data[:, 0:2].max()+1 y = np.zeros((N,N)) e_l = data[:,2] > 0 e_ix = data[:, 0:2][e_l] ix = list(zip(*e_ix)) y[ix] = data[:,2][e_l] data = dict(data=y) return data
def parse_tnet(fn, sep=' ')
-
Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ?
Source code
@classmethod def parse_tnet(cls, fn, sep=' '): ''' Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ? ''' cls.log.debug('opening file: %s' % fn) with open(fn) as f: content = f.read() lines = list(filter(None, content.split('\n'))) line1_length = lines[0].strip().split(sep) edges = {} if len(line1_length) == 2: # format 'i j' if edges. data_file_format = 'txt' for line in lines: dyad = line.strip().split(sep) dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep) for l in lines] elif len(line1_length) == 5: # format '"date" i j weight'. data_file_format = 'tnet' for line in lines: _line = line.strip().split(sep) dyad = _line[-3:-1] dyad = '.'.join(dyad) w = int(_line[-1]) edges[dyad] = edges.get(dyad, 0) + w #edges = [l.strip().split(sep)[-3:-1] for l in lines] edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 edges[:, 0:2] -= edges[:, 0:2].min() N = edges[:, 0:2].max()+1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data
class OnlineDatasetDriver (*args, **kwargs)
-
Parse dataset file using pandas
Source code
class OnlineDatasetDriver(object): ''' Parse dataset file using pandas''' _comment = '%' log = logging.getLogger('root') @classmethod def parse_tnet(cls, fn, sep=' '): ''' Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ? ''' cls.log.debug('opening file: %s' % fn) for line in open(fn): line = line.strip() if not line: continue line1_length = line.split(sep) if len(line1_length) == 2: # format 'i j' if edges. data_file_format = 'txt' v1, v2 = line.strip().split(sep) w = 1 yield int(v1), int(v2), w, None elif len(line1_length) == 5: # format '"date" i j weight'. data_file_format = 'tnet' _line = line.strip().split(sep) v1, v2 = _line[-3:-1] w = int(_line[-1]) if w == 0: continue else: yield int(v1), int(v2), w, None @classmethod def parse_csv(cls, fn, sep=';'): ''' Grammar retro-ingennired from manufacturing.csv ''' cls.log.debug('opening file: %s' % fn) cpt = 0 for line in open(fn): if cpt == 0: # Ignore first status line cpt += 1 continue v1, v2 = line.strip().split(sep)[0:2] w = 1 yield int(v1), int(v2), w, None @classmethod def parse_dancer(cls, fn, sep=';'): cls.log.debug('opening file: %s' % fn) inside = {'vertices':False, 'edges':False } for line in open(fn): line = line.strip() if line.startswith('# Vertices') or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip() : inside['vertices'] = False # break else: # Parsing assignation elements = line.strip().split(sep) index = int(elements[0]) clust = int(elements[-1]) feats = list(map(float, elements[-2].split('|'))) obj = {'cluster': clust, 'features': feats, 'index':index} yield obj elif line.startswith('# Edges') or inside['edges']: if not inside['edges']: inside['edges'] = True continue if line.startswith('#') or not line.strip() : inside['edges'] = False # break else: # Parsing assignation v1, v2 = line.split(sep) w = 1 yield int(v1), int(v2), w, None @classmethod def parse_dat(cls, fn, sep=" "): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) inside = {'vertices':False, 'edges':False } for line in open(fn): line = line.strip() if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip(): inside['vertices'] = False # break elif line.startswith(('DATA','*edges' )): inside['vertices'] = False # break inside['edges'] = True else: continue elif line.startswith(('DATA','*edges' )) or inside['edges']: if not inside['edges']: inside['edges'] = True # break continue if line.startswith('#') or not line.strip() or len(line.split()) < 2 : inside['edges'] = False else: # Parsing assignation splitline = line.split(sep) row_size = len(splitline) if row_size == 2: # like .txt v1, v2 = splitline w = 1 yield int(v1), int(v2), w, None elif row_size == 3: v1, v2 = splitline[0:2] w = int(splitline[2]) if w == 0: continue else: yield int(v1), int(v2), w, None else: raise NotImplementedError
Subclasses
Class variables
var log
Static methods
def parse_csv(fn, sep=';')
-
Grammar retro-ingennired from manufacturing.csv
Source code
@classmethod def parse_csv(cls, fn, sep=';'): ''' Grammar retro-ingennired from manufacturing.csv ''' cls.log.debug('opening file: %s' % fn) cpt = 0 for line in open(fn): if cpt == 0: # Ignore first status line cpt += 1 continue v1, v2 = line.strip().split(sep)[0:2] w = 1 yield int(v1), int(v2), w, None
def parse_dancer(fn, sep=';')
-
Source code
@classmethod def parse_dancer(cls, fn, sep=';'): cls.log.debug('opening file: %s' % fn) inside = {'vertices':False, 'edges':False } for line in open(fn): line = line.strip() if line.startswith('# Vertices') or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip() : inside['vertices'] = False # break else: # Parsing assignation elements = line.strip().split(sep) index = int(elements[0]) clust = int(elements[-1]) feats = list(map(float, elements[-2].split('|'))) obj = {'cluster': clust, 'features': feats, 'index':index} yield obj elif line.startswith('# Edges') or inside['edges']: if not inside['edges']: inside['edges'] = True continue if line.startswith('#') or not line.strip() : inside['edges'] = False # break else: # Parsing assignation v1, v2 = line.split(sep) w = 1 yield int(v1), int(v2), w, None
def parse_dat(fn, sep=' ')
-
Parse Network data depending on type/extension
Source code
@classmethod def parse_dat(cls, fn, sep=" "): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) inside = {'vertices':False, 'edges':False } for line in open(fn): line = line.strip() if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip(): inside['vertices'] = False # break elif line.startswith(('DATA','*edges' )): inside['vertices'] = False # break inside['edges'] = True else: continue elif line.startswith(('DATA','*edges' )) or inside['edges']: if not inside['edges']: inside['edges'] = True # break continue if line.startswith('#') or not line.strip() or len(line.split()) < 2 : inside['edges'] = False else: # Parsing assignation splitline = line.split(sep) row_size = len(splitline) if row_size == 2: # like .txt v1, v2 = splitline w = 1 yield int(v1), int(v2), w, None elif row_size == 3: v1, v2 = splitline[0:2] w = int(splitline[2]) if w == 0: continue else: yield int(v1), int(v2), w, None else: raise NotImplementedError
def parse_tnet(fn, sep=' ')
-
Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ?
Source code
@classmethod def parse_tnet(cls, fn, sep=' '): ''' Grammar retro-ingennired from fb/emaileu.txt. tnet format is official ? ''' cls.log.debug('opening file: %s' % fn) for line in open(fn): line = line.strip() if not line: continue line1_length = line.split(sep) if len(line1_length) == 2: # format 'i j' if edges. data_file_format = 'txt' v1, v2 = line.strip().split(sep) w = 1 yield int(v1), int(v2), w, None elif len(line1_length) == 5: # format '"date" i j weight'. data_file_format = 'tnet' _line = line.strip().split(sep) v1, v2 = _line[-3:-1] w = int(_line[-1]) if w == 0: continue else: yield int(v1), int(v2), w, None
class RawDatasetDriver (*args, **kwargs)
-
Parse dataset file using python loop (deprecated)
Source code
class RawDatasetDriver(object): ''' Parse dataset file using python loop (deprecated) ''' _comment = '%' log = logging.getLogger('root') @classmethod def parse_tnet(cls, fn, sep=' '): ''' Grammar retro-ingennired from fb/emaileu.txt ''' cls.log.debug('opening file: %s' % fn) with open(fn) as f: content = f.read() lines = list(filter(None, content.split('\n'))) line1_length = lines[0].strip().split(sep) edges = {} if len(line1_length) == 2: # format 'i j' if edges. data_file_format = 'txt' for line in lines: dyad = line.strip().split(sep) dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep) for l in lines] elif len(line1_length) == 5: # format '"date" i j weight'. data_file_format = 'tnet' for line in lines: _line = line.strip().split(sep) dyad = _line[-3:-1] dyad = '.'.join(dyad) w = int(_line[-1]) edges[dyad] = edges.get(dyad, 0) + w #edges = [l.strip().split(sep)[-3:-1] for l in lines] edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 N = edges.max() +1 #N = max(list(itertools.chain(*edges))) + 1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data @classmethod def parse_csv(cls, fn, sep=';'): ''' Grammar retro-ingennired from manufacturing.csv ''' cls.log.debug('opening file: %s' % fn) with open(fn) as f: content = f.read() lines = list(filter(None, content.split('\n')))[1:] edges = {} for line in lines: dyad = line.strip().split(sep)[0:2] dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep)[0:2] for l in lines] #edges = np.array([ (e[0], e[1]) for e in edges], dtype=int) -1 edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 N = edges.max() +1 #N = max(list(itertools.chain(*edges))) + 1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data @classmethod def parse_dancer(cls, fn, sep=';'): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) data = [] inside = {'vertices':False, 'edges':False } clusters = [] features = [] for line in open(fn): if line.startswith('# Vertices') or inside['vertices']: if not inside['vertices']: inside['vertices'] = True N = 0 continue if line.startswith('#') or not line.strip() : inside['vertices'] = False # break else: # Parsing assignation elements = line.strip().split(sep) clust = int(elements[-1]) feats = list(map(float, elements[-2].split('|'))) clusters.append(clust) features.append(feats) N += 1 elif line.startswith('# Edges') or inside['edges']: if not inside['edges']: inside['edges'] = True continue if line.startswith('#') or not line.strip() : inside['edges'] = False # break else: # Parsing assignation data.append( line.strip() ) edges = np.array([tuple(row.split(sep)) for row in data]).astype(int) g = np.zeros((N,N)) g[[e[0] for e in edges], [e[1] for e in edges]] = 1 g[[e[1] for e in edges], [e[0] for e in edges]] = 1 # ?! .T try: parameters = parse_file_conf(os.path.join(os.path.dirname(fn), 'parameters')) parameters['devs'] = list(map(float, parameters['devs'].split(sep))) except IOError: parameters = {} finally: # @Obsolete ! parameters_ = parameters clusters = clusters features = np.array(features) data = dict(data=g, clusters=clusters, features=features) return data @classmethod def parse_dat(cls, fn, sep=' '): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) data = [] inside = {'vertices':False, 'edges':False } for _line in open(fn): line = _line.strip() if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip(): inside['vertices'] = False # break elif line.startswith(('DATA','*edges' )): inside['vertices'] = False # break inside['edges'] = True else: # todo if needed continue elif line.startswith(('DATA','*edges' )) or inside['edges']: if not inside['edges']: inside['edges'] = True # break continue if line.startswith('#') or not line.strip() or len(line.split(sep)) < 2 : inside['edges'] = False else: # Parsing assignation data.append( line.strip() ) row_size = len(data[0].split(sep)) edges = np.array([tuple(row.split(sep)) for row in data]).astype(int)-1 edges = {} if row_size == 2: # like .txt for line in data: dyad = line.strip().split(sep) dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 elif row_size == 3: for line in data: _line = line.strip().split(sep) dyad = _line[0:2] dyad = '.'.join(dyad) w = int(_line[-1]) # can be zeros edges[dyad] = edges.get(dyad, 0) + int(w) else: raise NotImplementedError edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 N = edges.max() +1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data
Class variables
var log
Static methods
def parse_csv(fn, sep=';')
-
Grammar retro-ingennired from manufacturing.csv
Source code
@classmethod def parse_csv(cls, fn, sep=';'): ''' Grammar retro-ingennired from manufacturing.csv ''' cls.log.debug('opening file: %s' % fn) with open(fn) as f: content = f.read() lines = list(filter(None, content.split('\n')))[1:] edges = {} for line in lines: dyad = line.strip().split(sep)[0:2] dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep)[0:2] for l in lines] #edges = np.array([ (e[0], e[1]) for e in edges], dtype=int) -1 edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 N = edges.max() +1 #N = max(list(itertools.chain(*edges))) + 1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data
def parse_dancer(fn, sep=';')
-
Parse Network data depending on type/extension
Source code
@classmethod def parse_dancer(cls, fn, sep=';'): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) data = [] inside = {'vertices':False, 'edges':False } clusters = [] features = [] for line in open(fn): if line.startswith('# Vertices') or inside['vertices']: if not inside['vertices']: inside['vertices'] = True N = 0 continue if line.startswith('#') or not line.strip() : inside['vertices'] = False # break else: # Parsing assignation elements = line.strip().split(sep) clust = int(elements[-1]) feats = list(map(float, elements[-2].split('|'))) clusters.append(clust) features.append(feats) N += 1 elif line.startswith('# Edges') or inside['edges']: if not inside['edges']: inside['edges'] = True continue if line.startswith('#') or not line.strip() : inside['edges'] = False # break else: # Parsing assignation data.append( line.strip() ) edges = np.array([tuple(row.split(sep)) for row in data]).astype(int) g = np.zeros((N,N)) g[[e[0] for e in edges], [e[1] for e in edges]] = 1 g[[e[1] for e in edges], [e[0] for e in edges]] = 1 # ?! .T try: parameters = parse_file_conf(os.path.join(os.path.dirname(fn), 'parameters')) parameters['devs'] = list(map(float, parameters['devs'].split(sep))) except IOError: parameters = {} finally: # @Obsolete ! parameters_ = parameters clusters = clusters features = np.array(features) data = dict(data=g, clusters=clusters, features=features) return data
def parse_dat(fn, sep=' ')
-
Parse Network data depending on type/extension
Source code
@classmethod def parse_dat(cls, fn, sep=' '): """ Parse Network data depending on type/extension """ cls.log.debug('opening file: %s' % fn) data = [] inside = {'vertices':False, 'edges':False } for _line in open(fn): line = _line.strip() if line.startswith(('ROW LABELS:', '*vertices')) or inside['vertices']: if not inside['vertices']: inside['vertices'] = True continue if line.startswith('#') or not line.strip(): inside['vertices'] = False # break elif line.startswith(('DATA','*edges' )): inside['vertices'] = False # break inside['edges'] = True else: # todo if needed continue elif line.startswith(('DATA','*edges' )) or inside['edges']: if not inside['edges']: inside['edges'] = True # break continue if line.startswith('#') or not line.strip() or len(line.split(sep)) < 2 : inside['edges'] = False else: # Parsing assignation data.append( line.strip() ) row_size = len(data[0].split(sep)) edges = np.array([tuple(row.split(sep)) for row in data]).astype(int)-1 edges = {} if row_size == 2: # like .txt for line in data: dyad = line.strip().split(sep) dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 elif row_size == 3: for line in data: _line = line.strip().split(sep) dyad = _line[0:2] dyad = '.'.join(dyad) w = int(_line[-1]) # can be zeros edges[dyad] = edges.get(dyad, 0) + int(w) else: raise NotImplementedError edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 N = edges.max() +1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data
def parse_tnet(fn, sep=' ')
-
Grammar retro-ingennired from fb/emaileu.txt
Source code
@classmethod def parse_tnet(cls, fn, sep=' '): ''' Grammar retro-ingennired from fb/emaileu.txt ''' cls.log.debug('opening file: %s' % fn) with open(fn) as f: content = f.read() lines = list(filter(None, content.split('\n'))) line1_length = lines[0].strip().split(sep) edges = {} if len(line1_length) == 2: # format 'i j' if edges. data_file_format = 'txt' for line in lines: dyad = line.strip().split(sep) dyad = '.'.join(dyad) edges[dyad] = edges.get(dyad, 0) + 1 #edges = [l.strip().split(sep) for l in lines] elif len(line1_length) == 5: # format '"date" i j weight'. data_file_format = 'tnet' for line in lines: _line = line.strip().split(sep) dyad = _line[-3:-1] dyad = '.'.join(dyad) w = int(_line[-1]) edges[dyad] = edges.get(dyad, 0) + w #edges = [l.strip().split(sep)[-3:-1] for l in lines] edges = np.array([ (e.split('.')[0], e.split('.')[1], w+1) for e, w in edges.items()], dtype=int) -1 N = edges.max() +1 #N = max(list(itertools.chain(*edges))) + 1 g = np.zeros((N,N)) g[tuple(edges[:, :2].T)] = edges[:, 2] data = dict(data=g) return data