Skip to content

Commit 9bb4d2b

Browse files
committed
r1290: support ds in Python
1 parent bd0cba5 commit 9bb4d2b

7 files changed

Lines changed: 35 additions & 14 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
102102
# other non-file targets
103103

104104
clean:
105-
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
105+
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg* .eggs
106106

107107
depend:
108108
(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)

format.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,24 +361,34 @@ static void write_cs_ds_or_MD(void *km, kstring_t *s, const mm_idx_t *mi, const
361361
kfree(km, qseq); kfree(km, tseq); kfree(km, tmp);
362362
}
363363

364-
int mm_gen_cs_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int no_iden, int is_qstrand)
364+
int mm_gen_cs_ds_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int is_ds, int no_iden, int is_qstrand)
365365
{
366366
mm_bseq1_t t;
367367
kstring_t str;
368368
str.s = *buf, str.l = 0, str.m = *max_len;
369369
t.l_seq = strlen(seq);
370370
t.seq = (char*)seq;
371-
write_cs_ds_or_MD(km, &str, mi, &t, r, no_iden, is_MD, 0, 0, is_qstrand);
371+
write_cs_ds_or_MD(km, &str, mi, &t, r, no_iden, is_MD, is_ds, 0, is_qstrand);
372372
*max_len = str.m;
373373
*buf = str.s;
374374
return str.l;
375375
}
376376

377+
int mm_gen_cs_or_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int is_MD, int no_iden, int is_qstrand)
378+
{
379+
return mm_gen_cs_ds_or_MD(km, buf, max_len, mi, r, seq, is_MD, 0, no_iden, is_qstrand);
380+
}
381+
377382
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
378383
{
379384
return mm_gen_cs_or_MD(km, buf, max_len, mi, r, seq, 0, no_iden, 0);
380385
}
381386

387+
int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
388+
{
389+
return mm_gen_cs_ds_or_MD(km, buf, max_len, mi, r, seq, 0, 1, no_iden, 0);
390+
}
391+
382392
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
383393
{
384394
return mm_gen_cs_or_MD(km, buf, max_len, mi, r, seq, 1, 0, 0);

lib/simde

Submodule simde deleted from b30129b

minimap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <stdio.h>
66
#include <sys/types.h>
77

8-
#define MM_VERSION "2.30-r1287"
8+
#define MM_VERSION "2.30-r1290-dirty"
99

1010
#define MM_F_NO_DIAG (0x001LL) // no exact diagonal hit
1111
#define MM_F_NO_DUAL (0x002LL) // skip pairs where query name is lexicographically larger than target name
@@ -408,6 +408,7 @@ int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_
408408
* @return the length of cs
409409
*/
410410
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
411+
int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
411412
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
412413

413414
// query sequence name and sequence in the minimap2 index

python/cmappy.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ cdef extern from "minimap.h":
112112
void mm_tbuf_destroy(mm_tbuf_t *b)
113113
void *mm_tbuf_get_km(mm_tbuf_t *b)
114114
int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
115+
int mm_gen_ds(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
115116
int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
116117

117118
#

python/mappy.pyx

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ cdef class Alignment:
1414
cdef int8_t _strand, _trans_strand
1515
cdef uint8_t _mapq, _is_primary
1616
cdef int _seg_id
17-
cdef _ctg, _cigar, _cs, _MD # these are python objects
17+
cdef _ctg, _cigar, _cs, _ds, _MD # these are python objects
1818

19-
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id, cs_str, MD_str):
19+
def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id, cs_str, ds_str, MD_str):
2020
self._ctg = ctg if isinstance(ctg, str) else ctg.decode()
2121
self._ctg_len, self._r_st, self._r_en = cl, cs, ce
2222
self._strand, self._q_st, self._q_en = strand, qs, qe
@@ -27,6 +27,7 @@ cdef class Alignment:
2727
self._trans_strand = trans_strand
2828
self._seg_id = seg_id
2929
self._cs = cs_str
30+
self._ds = ds_str
3031
self._MD = MD_str
3132

3233
@property
@@ -77,6 +78,9 @@ cdef class Alignment:
7778
@property
7879
def cs(self): return self._cs
7980

81+
@property
82+
def ds(self): return self._ds
83+
8084
@property
8185
def MD(self): return self._MD
8286

@@ -96,6 +100,7 @@ cdef class Alignment:
96100
a = [str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en),
97101
str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str]
98102
if self._cs != "": a.append("cs:Z:" + self._cs)
103+
if self._ds != "": a.append("ds:Z:" + self._ds)
99104
if self._MD != "": a.append("MD:Z:" + self._MD)
100105
return "\t".join(a)
101106

@@ -165,7 +170,7 @@ cdef class Aligner:
165170
def __bool__(self):
166171
return (self._idx != NULL)
167172

168-
def map(self, seq, seq2=None, name=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None):
173+
def map(self, seq, seq2=None, name=None, buf=None, cs=False, ds=False, MD=False, max_frag_len=None, extra_flags=None):
169174
cdef cmappy.mm_reg1_t *regs
170175
cdef cmappy.mm_hitpy_t h
171176
cdef ThreadBuffer b
@@ -206,19 +211,22 @@ cdef class Aligner:
206211
i = 0
207212
while i < n_regs:
208213
cmappy.mm_reg2hitpy(self._idx, &regs[i], &h)
209-
cigar, _cs, _MD = [], '', ''
214+
cigar, _cs, _ds, _MD = [], '', '', ''
210215
for k in range(h.n_cigar32): # convert the 32-bit CIGAR encoding to Python array
211216
c = h.cigar32[k]
212217
cigar.append([c>>4, c&0xf])
213-
if cs or MD: # generate the cs and/or the MD tag, if requested
218+
if cs or ds or MD: # generate the cs/ds and/or the MD tag, if requested
214219
_cur_seq = _seq2 if h.seg_id > 0 and seq2 is not None else _seq
215220
if cs:
216221
l_cs_str = cmappy.mm_gen_cs(km, &cs_str, &m_cs_str, self._idx, &regs[i], _cur_seq, 1)
217222
_cs = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
223+
if ds:
224+
l_cs_str = cmappy.mm_gen_ds(km, &cs_str, &m_cs_str, self._idx, &regs[i], _cur_seq, 1)
225+
_ds = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
218226
if MD:
219227
l_cs_str = cmappy.mm_gen_MD(km, &cs_str, &m_cs_str, self._idx, &regs[i], _cur_seq)
220228
_MD = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode()
221-
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _MD)
229+
yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _ds, _MD)
222230
cmappy.mm_free_reg1(&regs[i])
223231
i += 1
224232
finally:

python/minimap2.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import mappy as mp
66

77
def main(argv):
8-
opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:cM")
8+
opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:cdM")
99
if len(args) < 2:
1010
print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>")
1111
print("Options:")
@@ -16,11 +16,12 @@ def main(argv):
1616
print(" -w INT minimizer window length")
1717
print(" -r INT band width")
1818
print(" -c output the cs tag")
19+
print(" -d output the ds tag")
1920
print(" -M output the MD tag")
2021
sys.exit(1)
2122

2223
preset = min_cnt = min_sc = k = w = bw = None
23-
out_cs = out_MD = False
24+
out_cs = out_ds = out_MD = False
2425
for opt, arg in opts:
2526
if opt == '-x': preset = arg
2627
elif opt == '-n': min_cnt = int(arg)
@@ -29,12 +30,13 @@ def main(argv):
2930
elif opt == '-k': k = int(arg)
3031
elif opt == '-w': w = int(arg)
3132
elif opt == '-c': out_cs = True
33+
elif opt == '-d': out_ds = True
3234
elif opt == '-M': out_MD = True
3335

3436
a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw)
3537
if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0]))
3638
for name, seq, qual in mp.fastx_read(args[1]): # read one sequence
37-
for h in a.map(seq, cs=out_cs, MD=out_MD): # traverse hits
39+
for h in a.map(seq, cs=out_cs, ds=out_ds, MD=out_MD): # traverse hits
3840
print('{}\t{}\t{}'.format(name, len(seq), h))
3941

4042
if __name__ == "__main__":

0 commit comments

Comments
 (0)