Skip to content
Snippets Groups Projects
Commit cdf5d6db authored by GEOFFREY CHARLES BILLY's avatar GEOFFREY CHARLES BILLY
Browse files

Updated CWPair2 XML input parameters (WIP) and CWPAIR2_util.

parent bf9740cd
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@
cwpair2.py
Input: gff format
.gff format: chromosome (chr##), strand (+/-), start (index), end (index), value (read count)
.gff format: chromosome (chr##), strand (+/-), start (index), end (index), value (read count)
Output: list of matched pairs and list of unmatched orphans
Files: S (simple), D (detailed), O (orphans), P (frequency preview plot), F (final frequency plot), statistics.txt
......@@ -15,7 +15,7 @@ import cwpair2_util
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', dest='inputs', action='append', help="Input datasets")
parser.add_argument('--mode', action='store', type='string', dest='method', default='mode', help='Method of finding match.')
parser.add_argument('--method', action='store', type='string', dest='method', default='mode', help='Method of finding match.')
parser.add_argument('--up_distance', action='store', type='int', dest='up_distance', default=50, help='Distance upstream of a pair. Default 50.')
parser.add_argument('--down_distance', action='store', type='int', dest='down_distance', default=100, help='Distance downstream of a pair. Default 100.')
parser.add_argument('--bin_size', action='store', type='int', dest='binsize', default=1, help='Width of bins for plots and mode.')
......
<?xml version="1.0"?>
<tool id="pughlab_genetrack" name="Genetrack" version="@WRAPPER_VERSION@.0">
<description>converts data to standard-peak GFF</description>
<tool id="pughlab_cwpair2" name="CWPAIR2" version="@WRAPPER_VERSION@.0">
<description>matches multiple GFF peak files together</description>
<macros>
<import>genetrack_macros.xml</import>
<import>cwpair2_macros.xml</import>
</macros>
<expand macro="requirements" />
<command>
<![CDATA[
mkdir output &&
python $__tool_directory__/genetrack.py
--input_format $input_format_cond.input_format
#if str($input_format_cond.input_format) == "genetracktool":
#for $i in $input_format_cond.input_genetracktool:
--input "${i}"
#end for
#elif str($input_format_cond.input_format) == "gff":
#for $i in $input_format_cond.input_gff:
--input "${i}"
#end for
python $__tool_directory__/cwpair2.py
--method $method
--down_distance $down_distance
--bin_size $bin_size
--threshold_format $threshold_format_cond.threshold_format
#if str($threshold_format_cond.threshold_format) == "absolute_threshold":
--threshold "${threshold_format_cond.absolute_threshold}"
#elif str($threshold_format_cond.threshold_format) == "relative_threshold":
--threshold "${threshold_format_cond.realtive_threshold}"
#end if
--sigma $sigma
--exclusion $exclusion
--up_width $up_width
--down_width $down_width
--filter $filter
--chromosome $chromosome
--chunk_size $chunk_size
--output $output
--output_format $output_format
]]>
</command>
<inputs>
<param name="input" type="data" format="gff" multiple="True" label="Convert gff files" />
<param name="sigma" type="integer" value="5" min="1" label="Sigma to use when smoothing reads to call peaks" help="Higher values increase computation but produce more smoothing." />
<param name="exclusion" type="integer" value="20" min="1" label="Peak exclusion zone" help="Exclusion zone around each peak that prevents others from being called." />
<param name="up_width" type="integer" value="0" min="0" label="Upstream width of called peaks" />
<param name="down_width" type="integer" value="0" min="0" label="Downstream width of called peaks" />
<param name="filter" type="integer" value="1" min="0" label="Absolute read filter" help="Removes peaks with lower peak height." />
<param name="chunk_size" type="integer" value="10" min="1" label="Chunk each chromosome into" help="Value is millions of base pairs where each size increment uses about 20MB of memory." />
<conditional name="threshold_format_cond">
<param name="threshold_format" type="select" label="Format of the threshold of filtering pairs">
<option value="absolute_threshold">Absolute</option>
<option value="relative_threshold" selected="True">Relative</option>
</param>
<when value="absolute_threshold">
<param name="absolute_threshold" type="integer" label="Threshold to filter by" />
</when>
<when value="relative_threshold">
<param name="relative_threshold" type="float" label="Percentile threshold" />
</when>
</conditional>
<param name="method" type="select" label="Method of finding a match">
<option value="mode" selected="True">Mode</option>
<option value="closest">Closest</option>
<option value="largest">Largest</option>
</param>
<param name="down_distance" type="integer" value="100" min="1" label="Distance downstream of a pair" />
<param name="bin_size" type="integer" value="1" min="1" label="Width of bins" />
<param name="plot_format" type="string" value="pdf" label="Format of output graph" />
<param name="output_format" type="string" value="gff" label="Output format of data" />
<param name="chrom_sort" type="string" value="asc" label="Sorting by chromosome"/>
<param name="score_sort" type="string" value ="none" label="Sorting by score"/>
<param name="output_format" type="select" label="Format of converted files">
<option value="gff" selected="True">Gff</option>
<option value="genetracktool">GenetrackTool</option>
......
......@@ -40,9 +40,9 @@ def parse_chromosomes(reader):
if len(line) == 9:
# gff3 format
cname, junk, junk, start, end, value, strand, junk, junk = line
else:
# txt format
cname, strand, start, end, value = line
else:
# txt format
cname, strand, start, end, value = line
start = int(start)
end = int(end)
value = float(value)
......@@ -313,8 +313,10 @@ def perform_process(path, match_func, threshold, options):
watson, crick = split_strands(chromosome)
logging.debug('%d watson, %s crick peaks' % (len(watson), len(crick)))
watson.sort(key=lambda data: -float(data[3])) # Sort by value of each peak
crick.sort(key=lambda data: float(data[1])) # Sort by position to facilitate binary search
# Sort by value of each peak
watson.sort(key=lambda data: -float(data[3]))
# Sort by position to facilitate binary search
crick.sort(key=lambda data: float(data[1]))
keys = make_keys(crick)
for peak in watson:
......@@ -326,8 +328,10 @@ def perform_process(path, match_func, threshold, options):
match = METHODS[match_func](window, peak)
if match:
# Write output
# (chr, start+, end+, value+, +, start-, end-, value-, -, dist, coord)
'''
Write output
(chr, start+, end+, value+, +, start-, end-, value-, -, dist, coord)
'''
midpoint = (match[1] + match[2] + peak[1] + peak[2]) // 4
d = distance(peak, match)
dist.add(d)
......@@ -396,11 +400,15 @@ def perform_process(path, match_func, threshold, options):
def write_statistics(statistics):
''' Writes a list of statistics to the file(s) specified by them'''
'''
Writes a list of statistics to the file(s) specified by them
'''
logging.info('Writing statistics')
by_file = {}
for stats in statistics: # Collect all the stats together by destination file
if not stats: # Skip "None" statistics from failed files
# Collect all the stats together by destination file
for stats in statistics:
# Skip "None" statistics from failed files
if not stats:
continue
path = stats['stats_path']
if path not in by_file:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment