Source code for cmapPy.set_io.gmt

"""
gmt.py

IO methods for handling GMT files.

A GMT is stored as a list of dictionaries.
Each line is its own dictionary.
Each dictionary has the following keys:
    - head (string): identifier for the set
    - desc (string): longer description of the set
    - entries (list): members of the set 

AUTHOR: Corey Flynn, Broad Institute, 2012
MODIFIED: Lev Litichevskiy, 2017

"""
import os

SET_IDENTIFIER_FIELD = "head"
SET_DESC_FIELD = "desc"
SET_MEMBERS_FIELD = "entry"


[docs]def read(file_path): """ Read a gmt file at the path specified by file_path. Args: file_path (string): path to gmt file Returns: gmt (GMT object): list of dicts, where each dict corresponds to one line of the GMT file """ # Read in file actual_file_path = os.path.expanduser(file_path) with open(actual_file_path, 'r') as f: lines = f.readlines() # Create GMT object gmt = [] # Iterate over each line for line_num, line in enumerate(lines): # Separate along tabs fields = line.split('\t') assert len(fields) > 2, ( "Each line must have at least 3 tab-delimited items. " + "line_num: {}, fields: {}").format(line_num, fields) # Get rid of trailing whitespace fields[-1] = fields[-1].rstrip() # Collect entries entries = fields[2:] # Remove empty entries entries = [x for x in entries if x] assert len(set(entries)) == len(entries), ( "There should not be duplicate entries for the same set. " + "line_num: {}, entries: {}").format(line_num, entries) # Store this line as a dictionary line_dict = {SET_IDENTIFIER_FIELD: fields[0], SET_DESC_FIELD: fields[1], SET_MEMBERS_FIELD: entries} gmt.append(line_dict) verify_gmt_integrity(gmt) return gmt
[docs]def verify_gmt_integrity(gmt): """ Make sure that set ids are unique. Args: gmt (GMT object): list of dicts Returns: None """ # Verify that set ids are unique set_ids = [d[SET_IDENTIFIER_FIELD] for d in gmt] assert len(set(set_ids)) == len(set_ids), ( "Set identifiers should be unique. set_ids: {}".format(set_ids))
[docs]def write(gmt, out_path): """ Write a GMT to a text file. Args: gmt (GMT object): list of dicts out_path (string): output path Returns: None """ with open(out_path, 'w') as f: for _, each_dict in enumerate(gmt): f.write(each_dict[SET_IDENTIFIER_FIELD] + '\t') f.write(each_dict[SET_DESC_FIELD] + '\t') f.write('\t'.join([str(entry) for entry in each_dict[SET_MEMBERS_FIELD]])) f.write('\n')