Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for new gene resources #115

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,11 @@ chromosomes are also output.
.. note:: Shared DNA is not computed on the X chromosome with the 1000 Genomes Project genetic
maps since the X chromosome is not included in these genetic maps.

In this example, there are 15,976 shared genes on both chromosomes transcribed from 36 segments
In this example, there are 51,740 shared genes/transcripts on both chromosomes transcribed from 36 segments
of shared DNA:

>>> len(results['two_chrom_shared_genes'])
15976
51740
>>> len(results['two_chrom_shared_dna'])
36

Expand Down
42 changes: 21 additions & 21 deletions tests/test_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def _generate_test_cytoBand_hg19(self):
def _generate_test_gene_dfs(
self,
chrom="1",
len1=3811,
len2=4000,
len1=11421,
len2=12000,
txStart1=1000000,
txEnd1=2000000,
txStart2=111600000,
Expand Down Expand Up @@ -133,8 +133,8 @@ def run_find_shared_dna_test_X(self, f):
20.837792 + 180.837755,
20.837792 + 180.837755 + 0.347344,
),
len1=54,
len2=3022,
len1=146,
len2=8243,
txStart1=2400000,
txEnd1=2600000,
txStart2=150000000,
Expand All @@ -148,7 +148,7 @@ def run_find_shared_dna_test_1000G(self, f):
pos=(1, 43800001),
rate=(63.0402663602 / (43800001 / 1e6), 0),
map_cMs=(0.0, 63.0402663602),
len1=2188,
len1=6096,
)

def run_find_shared_dna_test(self, f, HapMap2=True, **kwargs):
Expand Down Expand Up @@ -388,8 +388,8 @@ def _test_find_shared_dna_two_chrom_shared(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 1
assert len(d["one_chrom_shared_genes"]) == 3811
assert len(d["two_chrom_shared_genes"]) == 3811
assert len(d["one_chrom_shared_genes"]) == 11421
assert len(d["two_chrom_shared_genes"]) == 11421
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
np.testing.assert_allclose(
Expand All @@ -416,8 +416,8 @@ def _test_find_shared_dna_two_chrom_shared_1000G(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 1
assert len(d["one_chrom_shared_genes"]) == 2188
assert len(d["two_chrom_shared_genes"]) == 2188
assert len(d["one_chrom_shared_genes"]) == 6096
assert len(d["two_chrom_shared_genes"]) == 6096
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
np.testing.assert_allclose(
Expand Down Expand Up @@ -447,8 +447,8 @@ def _test_find_shared_dna_two_chrom_shared_three_ind(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 1
assert len(d["one_chrom_shared_genes"]) == 3811
assert len(d["two_chrom_shared_genes"]) == 3811
assert len(d["one_chrom_shared_genes"]) == 11421
assert len(d["two_chrom_shared_genes"]) == 11421
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
np.testing.assert_allclose(
Expand All @@ -475,8 +475,8 @@ def _test_find_shared_dna_two_chrom_shared_no_output(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 1
assert len(d["one_chrom_shared_genes"]) == 3811
assert len(d["two_chrom_shared_genes"]) == 3811
assert len(d["one_chrom_shared_genes"]) == 11421
assert len(d["two_chrom_shared_genes"]) == 11421
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
np.testing.assert_allclose(
Expand Down Expand Up @@ -505,7 +505,7 @@ def _test_find_shared_dna_one_chrom_shared(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 0
assert len(d["one_chrom_shared_genes"]) == 3811
assert len(d["one_chrom_shared_genes"]) == 11421
assert len(d["two_chrom_shared_genes"]) == 0
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
Expand Down Expand Up @@ -535,7 +535,7 @@ def _test_find_shared_dna_one_chrom_shared_three_ind(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 0
assert len(d["one_chrom_shared_genes"]) == 3811
assert len(d["one_chrom_shared_genes"]) == 11421
assert len(d["two_chrom_shared_genes"]) == 0
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
Expand Down Expand Up @@ -574,8 +574,8 @@ def _test_find_shared_dna_X_chrom_two_individuals_male(self):

assert len(d["one_chrom_shared_dna"]) == 1 # PAR1, non-PAR, PAR2
assert len(d["two_chrom_shared_dna"]) == 1 # PAR1
assert len(d["one_chrom_shared_genes"]) == 3022
assert len(d["two_chrom_shared_genes"]) == 54
assert len(d["one_chrom_shared_genes"]) == 8243
assert len(d["two_chrom_shared_genes"]) == 146
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
np.testing.assert_allclose(
Expand Down Expand Up @@ -616,8 +616,8 @@ def _test_find_shared_dna_X_chrom_two_individuals_female(self):

assert len(d["one_chrom_shared_dna"]) == 1 # PAR1, non-PAR, PAR2
assert len(d["two_chrom_shared_dna"]) == 1 # PAR1, non-PAR, PAR2
assert len(d["one_chrom_shared_genes"]) == 3022
assert len(d["two_chrom_shared_genes"]) == 3022
assert len(d["one_chrom_shared_genes"]) == 8243
assert len(d["two_chrom_shared_genes"]) == 8243
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 0
np.testing.assert_allclose(
Expand Down Expand Up @@ -653,8 +653,8 @@ def _test_find_shared_dna_two_chrom_shared_discrepant_snps(self):

assert len(d["one_chrom_shared_dna"]) == 1
assert len(d["two_chrom_shared_dna"]) == 1
assert len(d["one_chrom_shared_genes"]) == 3811
assert len(d["two_chrom_shared_genes"]) == 3811
assert len(d["one_chrom_shared_genes"]) == 11421
assert len(d["two_chrom_shared_genes"]) == 11421
assert len(d["one_chrom_discrepant_snps"]) == 0
assert len(d["two_chrom_discrepant_snps"]) == 2
np.testing.assert_allclose(
Expand Down
12 changes: 6 additions & 6 deletions tests/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def f():
assert len(cytoBand_hg19) == 862

def _generate_test_knownGene_hg19_resource(self):
s = "s\ts\ts\t0\t0\t0\t0\t0\ts\ts\ts\ts\n" * 82960
s = "s\ts\ts\t0\t0\t0\t0\t0\ts\ts\ts\ts\n" * 254535

mock = mock_open(read_data=gzip.compress(s.encode()))
with patch("urllib.request.urlopen", mock):
Expand All @@ -207,14 +207,14 @@ def f():
self.resource.get_knownGene_hg19() if self.downloads_enabled else f()
)

assert len(knownGene_hg19) == 82960
assert len(knownGene_hg19) == 254535

# get already loaded resource
knownGene_hg19 = self.resource.get_knownGene_hg19()
assert len(knownGene_hg19) == 82960
assert len(knownGene_hg19) == 254535

def _generate_test_kgXref_hg19_resource(self):
s = "s\ts\ts\ts\ts\ts\ts\ts\n" * 82960
s = "s\ts\ts\ts\ts\ts\ts\ts\n" * 254491

mock = mock_open(read_data=gzip.compress(s.encode()))
with patch("urllib.request.urlopen", mock):
Expand All @@ -228,11 +228,11 @@ def f():

kgXref_hg19 = self.resource.get_kgXref_hg19() if self.downloads_enabled else f()

assert len(kgXref_hg19) == 82960
assert len(kgXref_hg19) == 254491

# get already loaded resource
kgXref_hg19 = self.resource.get_kgXref_hg19()
assert len(kgXref_hg19) == 82960
assert len(kgXref_hg19) == 254491

def test_get_all_resources(self):
def f():
Expand Down