from .tree import *
from mb.util.tabular import roll_toks, augment_cols, merge_tables, censor, partition
#####################################
#
# ABSTRACT TYPES
#
#####################################
[docs]class TokMeasures(MBType):
SUFFIX = '.tokmeasures'
FILE_TYPE = 'table'
DESCR_SHORT = 'tokmeasures'
DESCR_LONG = "Abstract base class for token-by-token measures (tokmeasures) types.\n"
@classmethod
def is_abstract(cls):
return cls.__name__ == 'TokMeasures'
[docs]class ItemMeasures(MBType):
SUFFIX = '.itemmeasures'
FILE_TYPE = 'table'
DESCR_SHORT = 'itemmeasures'
DESCR_LONG = "Abstract base class for item-by-item measures (itemmeasures) types.\n"
@classmethod
def is_abstract(cls):
return cls.__name__ == 'ItemMeasures'
[docs]class EvMeasures(MBType):
SUFFIX = '.evmeasures'
FILE_TYPE = 'table'
DESCR_SHORT = 'evmeasures'
DESCR_LONG = "Abstract base class for event measures (evmeasures) types.\n"
@classmethod
def is_abstract(cls):
return cls.__name__ == 'EvMeasures'
[docs]class PrdMeasures(MBType):
SUFFIX = '.prdmeasures'
FILE_TYPE = 'table'
DESCR_SHORT = 'prdmeasures'
DESCR_LONG = "Abstract base class for predictor measures (prdmeasures) types.\n"
@classmethod
def is_abstract(cls):
return cls.__name__ == 'PrdMeasures'
[docs]class ResMeasures(MBType):
SUFFIX = '.resmeasures'
FILE_TYPE = 'table'
DESCR_SHORT = 'resmeasures'
DESCR_LONG = "Abstract base class for response measures (resmeasures) types.\n"
@classmethod
def is_abstract(cls):
return cls.__name__ == 'ResMeasures'
#####################################
#
# TOKMEASURES TYPES
#
#####################################
[docs]class TokMeasuresDLT(TokMeasures):
MANIP = '.dlt'
STEM_PREREQ_TYPES = [GoldLineTrees]
STATIC_PREREQ_TYPES = ['scripts/dlt.py']
DESCR = 'DLT measures'
DESCR_LONG = 'Compute DLT (integration cost) measures from linetrees'
@classmethod
def augment_prereq(cls, i, path):
return '.gold'
def body(self):
out = "cat %s | python3 -m mb.static_resources.scripts.dlt > %s" % (
self.stem_prereqs()[0].path,
self.path
)
return out
#####################################
#
# ITEMMEASURES TYPES
#
#####################################
[docs]class ItemmeasuresRolled(ItemMeasures):
STEM_PREREQ_TYPES = [TokMeasures]
DESCR_SHORT = 'rolled itemmeasures'
DESCR_LONG = 'Itemmeasures rolled from tokmeasures'
@classmethod
def other_prereq_paths(cls, path):
if path is None:
return ['(DIR/)<LINEITEMS>.lineitems']
basename = '.'.join(path.split('.')[:-2])
out = [basename + '.itemmeasures']
return out
@classmethod
def other_prereq_type(cls, i, path):
return ItemMeasures
def body(self):
def out(tokmeasures, lineitems):
outputs = roll_toks(tokmeasures, lineitems, skip_cols=['sentid', 'embddepthMin', 'timestamp'])
return outputs
return out
[docs]class ItemmeasuresConcat(ItemMeasures):
MANIP = '.concat'
STEM_PREREQ_TYPES = [ItemMeasures]
REPEATABLE_PREREQ = True
DESCR_SHORT = 'concatenated itemmeasures'
DESCR_LONG = 'Itemmeasures from (column) concatenation of tables'
def body(self):
def out(*args):
same_rows = True
n_rows = None
for x in args:
if n_rows is None:
n_rows = x.shape[0]
else:
if n_rows != x.shape[0]:
same_rows = False
break
assert same_rows, 'All inputs must have the same number of rows. Got: %s.' % [x.shape[0] for x in args]
args = [x.reset_index(drop=True) for x in args]
colset = set()
coldrop = {'word', 'sentpos', 'sentid', 'docid', 'rolled'}
def col_mapper(x):
if x in colset:
i = 1
colname = x + str(i)
while colname in colset:
i += 1
out = colname
else:
out = x
return out
new_args = []
for i, x in enumerate(args):
coldrop_cur = colset & coldrop
x = x[[y for y in x.columns if not y in coldrop_cur]]
x = x.rename(col_mapper, axis=1)
colset |= set(x.columns)
new_args.append(x)
outputs = pd.concat(new_args, axis=1)
outputs = augment_cols(outputs)
return outputs
return out
#####################################
#
# EVMEASURES TYPES
#
#####################################
[docs]class EvMeasuresMerged(EvMeasures):
STEM_PREREQ_TYPES = [ItemMeasures]
DESCR_SHORT = 'merged evmeasures'
DESCR_LONG = 'Merge of evmeasures with itemmeasures'
@classmethod
def augment_prereq(cls, i, path):
return '.concat'
@classmethod
def other_prereq_paths(cls, path):
if path is None:
return ['(DIR/)<CORPUS>.evmeasures']
base = os.path.basename(path).split('.')[0]
out = [os.path.join(os.path.dirname(path), '%s.evmeasures' % base)]
return out
@classmethod
def other_prereq_type(cls, i, path):
return EvMeasures
def body(self):
def out(itemmeasures, evmeasures):
itemmeasures = itemmeasures.copy()
evmeasures = evmeasures.copy()
outputs = merge_tables(evmeasures, itemmeasures, ['sentid', 'sentpos'])
return outputs
return out
#####################################
#
# PRDMEASURES TYPES
#
#####################################
#####################################
#
# RESMEASURES TYPES
#
#####################################
[docs]class ResMeasuresReg(ResMeasures):
STEM_PREREQ_TYPES = [EvMeasures]
ARG_TYPES = [
Arg(
'cens_params_file',
dtype=str,
positional=True,
descr='Basename of *.ini file in local directory ``prm`` providing censorship instructions.'
),
Arg(
'part_params_file',
dtype=str,
positional=True,
descr='Basename of *.ini file in local directory ``prm`` providing partitioning instructions.'
),
Arg(
'partition_name',
dtype=str,
positional=True,
descr='Name of partition element to use. One of ["fit", "expl", "held"].'
)
]
DESCR_LONG = 'resmeasures for regression analysis'
@classmethod
def other_prereq_paths(cls, path):
if path is None:
return ['prm/<CENS-PARAMS>.partprm.ini', 'prm/<CENS-PARAMS>.censprm.ini']
out = []
args = cls.parse_args(path)
out.append('prm/%s.partprm.ini' % args['part_params_file'])
out.append('prm/%s.censprm.ini' % args['cens_params_file'])
return out
@classmethod
def other_prereq_type(cls, i, path):
return ParamFile
def body(self):
def out(*args):
evmeasures = args[0].copy()
other_prereqs = self.other_prereqs()
part_params_file = other_prereqs[0].path
cens_params_file = other_prereqs[1].path
part_name = self.args['partition_name'].split(DELIM[2])
evmeasures = censor(evmeasures, cens_params_file)
evmeasures = partition(evmeasures, part_params_file, part_name)
return evmeasures
return out