Skip to content

Commit

Permalink
Add functions to retrieve all rsIDs from a VariantKey
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolaasuni committed Nov 23, 2018
1 parent 68a8f82 commit b7041d4
Show file tree
Hide file tree
Showing 31 changed files with 447 additions and 21 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.3.5
5.4.0
2 changes: 1 addition & 1 deletion c/doc/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ PROJECT_NAME = "VariantKey"
# This could be handy for archiving the generated documentation or
# if some version control system is used.

PROJECT_NUMBER = 5.3.5
PROJECT_NUMBER = 5.4.0

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer
Expand Down
21 changes: 21 additions & 0 deletions c/src/variantkey/rsidvar.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,27 @@ static inline uint32_t find_vr_rsid_by_variantkey(rsidvar_cols_t cvr, uint64_t *
return *(cvr.rs + found);
}

/**
* Get the next rsID for the specified VariantKey in the VR file.
* This function should be used after find_vr_rsid_by_variantkey.
* This function can be called in a loop to get all rsIDs that are associated with the same VariantKey (if any).
*
* @param cvr Structure containing the pointers to the VKRS memory mapped file columns (vkrs.bin).
* @param pos Pointer to the current item. This will hold the position of the next record.
* @param last Element (up to but not including) where to end the search (max value = nitems).
* @param vk VariantKey.
*
* @return rsID data or zero data if not found
*/
static inline uint32_t get_next_vr_rsid_by_variantkey(rsidvar_cols_t cvr, uint64_t *pos, uint64_t last, uint64_t vk)
{
if (col_has_next_uint64_t(cvr.vk, pos, last, vk))
{
return *(cvr.rs + *pos);
}
return 0;
}

/**
* Search for the specified CHROM-POS range and returns the first occurrence of rsID in the VR file.
*
Expand Down
7 changes: 6 additions & 1 deletion c/test/test_example.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,14 @@ int main()
first = 0;
uint64_t last = 9;
rsid = find_vr_chrompos_range(cvr, &first, &last, 0x14, 0x000256C5, 0x000256CB);
fprintf(stdout, "%" PRIu32 "%" PRIu64 "%" PRIu64 "\n", rsid, first, last);
fprintf(stdout, "%" PRIu32 " %" PRIu64 " %" PRIu64 "\n", rsid, first, last);
// 9973 7 8

fpos = 2;
rsid = get_next_vr_rsid_by_variantkey(cvr, &fpos, 9, 0x80010274003A0000);
fprintf(stdout, "%" PRIu32 " %" PRIu64 "\n", rsid, fpos);
// 97 3

err = munmap_binfile(vr);
if (err != 0)
{
Expand Down
30 changes: 30 additions & 0 deletions c/test/test_rsidvar.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,35 @@ int test_find_vr_rsid_by_variantkey_notfound(rsidvar_cols_t cvr)
return errors;
}

int test_get_next_vr_rsid_by_variantkey(rsidvar_cols_t cvr)
{
int errors = 0;
uint64_t pos = 2;
uint32_t rsid = get_next_vr_rsid_by_variantkey(cvr, &pos, cvr.nrows, 0x80010274003A0000);
if (pos != 3)
{
fprintf(stderr, "%s (1 Expected) pos 3, got %" PRIu64 "\n", __func__, pos);
++errors;
}
if (rsid != 97)
{
fprintf(stderr, "%s (1) Expected rsid 97, got %" PRIx32 "\n", __func__, rsid);
++errors;
}
rsid = get_next_vr_rsid_by_variantkey(cvr, &pos, cvr.nrows, 0x80010274003A0000);
if (pos != 4)
{
fprintf(stderr, "%s (2) Expected pos 4, got %" PRIu64 "\n", __func__, pos);
++errors;
}
if (rsid != 0)
{
fprintf(stderr, "%s (2) Expected rsid 0, got %" PRIx32 "\n", __func__, rsid);
++errors;
}
return errors;
}

int test_find_vr_chrompos_range(rsidvar_cols_t cvr)
{
int errors = 0;
Expand Down Expand Up @@ -340,6 +369,7 @@ int main()
errors += test_get_next_rv_variantkey_by_rsid(crv);
errors += test_find_vr_rsid_by_variantkey(cvr);
errors += test_find_vr_rsid_by_variantkey_notfound(cvr);
errors += test_get_next_vr_rsid_by_variantkey(cvr);
errors += test_find_vr_chrompos_range(cvr);
errors += test_find_vr_chrompos_range_notfound(cvr);

Expand Down
2 changes: 1 addition & 1 deletion conda/c.src/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: variantkey-src
version: 5.3.5
version: 5.4.0

source:
path: ../..
Expand Down
2 changes: 1 addition & 1 deletion conda/c.vk/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: variantkey-vk
version: 5.3.5
version: 5.4.0

source:
path: ../..
Expand Down
8 changes: 4 additions & 4 deletions conda/python-class/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: pyvariantkey
version: 5.3.5
version: 5.4.0

source:
path: ../..
Expand All @@ -14,11 +14,11 @@ requirements:
- setuptools
- numpy >=1.15.0
build:
- variantkey >=5.3.5
- variantkey >=5.4.0
- numpy >=1.15.0
run:
- python
- variantkey >=5.3.5
- variantkey >=5.4.0
- numpy >=1.15.0

test:
Expand All @@ -30,7 +30,7 @@ test:
- pytest-cov
- pytest-benchmark
- pycodestyle
- variantkey >=5.3.5
- variantkey >=5.4.0
- numpy >=1.15.0
imports:
- pyvariantkey
Expand Down
2 changes: 1 addition & 1 deletion conda/python/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: variantkey
version: 5.3.5
version: 5.4.0

source:
path: ../..
Expand Down
2 changes: 1 addition & 1 deletion conda/r/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: r-variantkey
version: 5.3.5
version: 5.4.0

source:
path: ../..
Expand Down
8 changes: 7 additions & 1 deletion go/example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,14 @@ func main() {
fmt.Println(vr.FindVRRsidByVariantKey(0, 9, 0x80010274003A0000))
// 97 3

fmt.Println(vr.GetNextVRRsidByVariantKey(2, 9, 0x80010274003A0000))
// 97 3

fmt.Println(vr.FindAllVRRsidByVariantKey(0, 9, 0x80010274003A0000))
// [97]

fmt.Println(vr.FindVRChromPosRange(0, 9, 0x14, 0x000256C5, 0x000256CB))
// 9973 7 8
// 9973 7 9

// /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\

Expand Down
29 changes: 29 additions & 0 deletions go/src/rsidvar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,35 @@ func BenchmarkFindVRRsidByVariantKey(b *testing.B) {
}
}

func TestGetNextVRRsidByVariantKey(t *testing.T) {
var rsid uint32
pos := uint64(2)
rsid, pos = vr.GetNextVRRsidByVariantKey(pos, vr.NRows, 0x80010274003A0000)
if pos != 3 {
t.Errorf("(1) Expected pos 3, got %d", pos)
}
if rsid != 97 {
t.Errorf("(1) Expected rsID 97, got %d", rsid)
}
rsid, pos = vr.GetNextVRRsidByVariantKey(pos, vr.NRows, 0x80010274003A0000)
if pos != 4 {
t.Errorf("(2) Expected pos 4, got %d", pos)
}
if rsid != 0 {
t.Errorf("(2) Expected rsID 0, got %d", rsid)
}
}

func TestFindAllVRRsidByVariantKey(t *testing.T) {
rsid := vr.FindAllVRRsidByVariantKey(0, vr.NRows, 0x80010274003A0000)
if len(rsid) != 1 {
t.Errorf("Expected len 1, got %d", len(rsid))
}
if rsid[0] != 97 {
t.Errorf("Expected rsID 97, got %d", rsid[0])
}
}

func TestFindVRChromPosRange(t *testing.T) {
rsid, first, last := vr.FindVRChromPosRange(0, vr.NRows, testData[6].chrom, testData[7].pos, testData[8].pos)
if rsid != testData[7].rsid {
Expand Down
23 changes: 23 additions & 0 deletions go/src/variantkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,29 @@ func (crv RSIDVARCols) FindVRRsidByVariantKey(first uint64, last uint64, vk uint
return rsid, uint64(cfirst)
}

// GetNextVRRsidByVariantKey get the next rsID for the specified VariantKey in the VR file.
// Returns the rsID or 0, and the position
func (cvr RSIDVARCols) GetNextVRRsidByVariantKey(pos, last uint64, vk uint64) (uint32, uint64) {
cpos := C.uint64_t(pos)
rsid := uint32(C.get_next_vr_rsid_by_variantkey(castGoRSIDVARColsToC(cvr), &cpos, C.uint64_t(last), C.uint64_t(vk)))
return rsid, uint64(cpos)
}

// FindAllVRRsidByVariantKey get all rsID for the specified VariantKeys in the VR file.
// Returns a list of rsIDs
func (cvr RSIDVARCols) FindAllVRRsidByVariantKey(first, last uint64, vk uint64) (rsids []uint32) {
ccr := castGoRSIDVARColsToC(cvr)
cfirst := C.uint64_t(first)
clast := C.uint64_t(last)
cvk := C.uint64_t(vk)
rsid := uint32(C.find_vr_rsid_by_variantkey(ccr, &cfirst, clast, cvk))
for rsid > 0 {
rsids = append(rsids, rsid)
rsid = uint32(C.get_next_vr_rsid_by_variantkey(ccr, &cfirst, clast, cvk))
}
return
}

// FindVRChromPosRange search for the specified CHROM-POS range and returns the first occurrence of RSID in the VR file.
func (crv RSIDVARCols) FindVRChromPosRange(first, last uint64, chrom uint8, posMin, posMax uint32) (uint32, uint64, uint64) {
cfirst := C.uint64_t(first)
Expand Down
2 changes: 1 addition & 1 deletion python-class/.gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.cache
.coverage*
.coverage
LICENSE
README.md
c/
Expand Down
45 changes: 44 additions & 1 deletion python-class/pyvariantkey/variantkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def find_all_rv_variantkey_by_rsid(self, rsid):
vk = []
rsid_arr = np.array(rsid).astype(np.uint32)
for x in np.nditer(rsid_arr):
vk = vk + pvk.find_all_rv_variantkey_by_rsid(self.rsvk_mc, 0, self.rsvk_nrows, x)
vk = vk + pvk.find_all_rv_variantkey_by_rsid(self.rsvk_mc, 0, self.rsvk_nrows, x.item(0))
return np.array(vk).astype(np.uint64)

def find_vr_rsid_by_variantkey(self, vk):
Expand All @@ -457,6 +457,49 @@ def find_vr_rsid_by_variantkey(self, vk):
self.vkrs_nrows,
np.array(vk).astype(np.uint64))

def get_next_vr_rsid_by_variantkey(self, pos, vk):
"""Get the next rsID for the specified VariantKey in the VR file."\
" This function should be used after find_vr_rsid_by_variantkey."\
" This function can be called in a loop to get all rsIDs that are associated with the same VariantKey (if any).
Parameters
----------
pos : uint64
Current item position.
vk : uint64
variantKey to search.
Returns
-------
tuple :
- uint32 : rsID or 0 in case not found.
- uint64 : Item position in the file.
"""
f = np.vectorize(pvk.get_next_vr_rsid_by_variantkey, excluded=['mc', 'last'], otypes=[np.uint32, np.uint64])
return f(self.vkrs_mc,
np.array(pos).astype(np.uint64),
self.vkrs_nrows,
np.array(vk).astype(np.uint64))

def find_all_vr_rsid_by_variantkey(self, vk):
"""Search for the specified VariantKey and returns all associated rsIDs.
Parameters
----------
vk : uint64
variantKey to search.
Returns
-------
uint32 :
- rsID(s).
"""
rs = []
vk_arr = np.array(vk).astype(np.uint64)
for x in np.nditer(vk_arr):
rs = rs + pvk.find_all_vr_rsid_by_variantkey(self.vkrs_mc, 0, self.vkrs_nrows, x.item(0))
return np.array(rs).astype(np.uint32)

def find_vr_chrompos_range(self, chrom, pos_min, pos_max):
"""Search for the specified CHROM-POS range and returns the first occurrence of rsID in the VR file.
Expand Down
4 changes: 2 additions & 2 deletions python-class/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def run(self):

setup(
name='pyvariantkey',
version='5.3.5.1',
version='5.4.0.1',
keywords=('variantkey variant key genetic genomics'),
description="VariantKey Python wrapper class",
long_description=read('../README.md'),
Expand All @@ -51,7 +51,7 @@ def run(self):
],
install_requires=[
'numpy>=1.15.0',
'variantkey>=5.3.5.1',
'variantkey>=5.4.0.1',
],
extras_require={
'test': [
Expand Down
6 changes: 6 additions & 0 deletions python-class/test/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@
vk.find_vr_rsid_by_variantkey(vk=0x80010274003a0000)
# (array(97, dtype=uint32), array(3, dtype=uint64))

vk.get_next_vr_rsid_by_variantkey(2, vk=0x80010274003a0000)
# (array(97, dtype=uint32), array(3, dtype=uint64))

vk.find_all_vr_rsid_by_variantkey(0x80010274003a0000)
# array([97], dtype=uint32)

vk.find_vr_chrompos_range(0x14, 0x000256c5, 0x000256cb)
# (array(9973, dtype=uint32), array(7, dtype=uint64), array(9, dtype=uint64))

Expand Down
18 changes: 15 additions & 3 deletions python-class/test/test_rsidvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def test_find_rv_variantkey_by_rsid_notfound(self):
np.testing.assert_array_equal(first, 9)

def test_get_next_rv_variantkey_by_rsid(self):
vk, pos = npvk.get_next_rv_variantkey_by_rsid(2, 0x00000061)
np.testing.assert_array_equal(vk, 0x80010274003A0000)
vk, pos = npvk.get_next_rv_variantkey_by_rsid(2, testData[3, 4].astype(np.uint32))
np.testing.assert_array_equal(vk, testData[3, 5].astype(np.uint64))
np.testing.assert_array_equal(pos, 3)
vk, pos = npvk.get_next_rv_variantkey_by_rsid(pos, 0x00000061)
vk, pos = npvk.get_next_rv_variantkey_by_rsid(pos, testData[3, 4].astype(np.uint32))
np.testing.assert_array_equal(vk, 0)
np.testing.assert_array_equal(pos, 4)

Expand All @@ -79,6 +79,18 @@ def test_find_vr_rsid_by_variantkey_notfound(self):
np.testing.assert_array_equal(rx, 0)
np.testing.assert_array_equal(first, 9)

def test_get_next_vr_rsid_by_variantkey(self):
rsid, pos = npvk.get_next_vr_rsid_by_variantkey(2, testData[3, 5].astype(np.uint64))
np.testing.assert_array_equal(rsid, 97)
np.testing.assert_array_equal(pos, 3)
rsid, pos = npvk.get_next_vr_rsid_by_variantkey(pos, testData[3, 5].astype(np.uint64))
np.testing.assert_array_equal(rsid, 0)
np.testing.assert_array_equal(pos, 4)

def test_find_all_vr_rsid_by_variantkey(self):
rsids = npvk.find_all_vr_rsid_by_variantkey(testData[3, 5].astype(np.uint64))
np.testing.assert_array_equal(len(rsids), 1)

def test_find_vr_chrompos_range(self):
xrsid, xfirst, xlast = npvk.find_vr_chrompos_range(0x14, 0x000256c5, 0x000256cb)
np.testing.assert_array_equal(xrsid, 0x000026f5)
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def run(self):

setup(
name='variantkey',
version='5.3.5.1',
version='5.4.0.1',
keywords=('variantkey variant key genetic genomics'),
description="VariantKey Bindings for Python",
long_description=read('../README.md'),
Expand Down
6 changes: 6 additions & 0 deletions python/test/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@
vk.find_vr_rsid_by_variantkey(mc, 0, nrows, vk=0X80010274003A0000)
# (97, 3)

vk.get_next_vr_rsid_by_variantkey(mc, 2, nrows, vk=0X80010274003A0000)
# (97, 3)

vk.find_all_vr_rsid_by_variantkey(mc, 0, nrows, vk=0X80010274003A0000)
# [97]

vk.find_vr_chrompos_range(mc, 0, nrows, 0X14, 0X000256C5, 0X000256CB)
# (9973, 7, 9)

Expand Down
Loading

0 comments on commit b7041d4

Please sign in to comment.