From bcb5bacc2050b32cb902fe0b0884592cb72820d8 Mon Sep 17 00:00:00 2001 From: Andrew Riha Date: Mon, 11 Nov 2024 22:37:22 -0800 Subject: [PATCH] Updates for new gene resources --- README.rst | 4 ++-- tests/test_lineage.py | 42 ++++++++++++++++++++--------------------- tests/test_resources.py | 12 ++++++------ 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.rst b/README.rst index c0e1e19..733d25d 100644 --- a/README.rst +++ b/README.rst @@ -227,11 +227,11 @@ chromosomes are also output. .. note:: Shared DNA is not computed on the X chromosome with the 1000 Genomes Project genetic maps since the X chromosome is not included in these genetic maps. -In this example, there are 15,976 shared genes on both chromosomes transcribed from 36 segments +In this example, there are 51,740 shared genes/transcripts on both chromosomes transcribed from 36 segments of shared DNA: >>> len(results['two_chrom_shared_genes']) -15976 +51740 >>> len(results['two_chrom_shared_dna']) 36 diff --git a/tests/test_lineage.py b/tests/test_lineage.py index a7e36ce..95828bb 100644 --- a/tests/test_lineage.py +++ b/tests/test_lineage.py @@ -78,8 +78,8 @@ def _generate_test_cytoBand_hg19(self): def _generate_test_gene_dfs( self, chrom="1", - len1=3811, - len2=4000, + len1=11421, + len2=12000, txStart1=1000000, txEnd1=2000000, txStart2=111600000, @@ -133,8 +133,8 @@ def run_find_shared_dna_test_X(self, f): 20.837792 + 180.837755, 20.837792 + 180.837755 + 0.347344, ), - len1=54, - len2=3022, + len1=146, + len2=8243, txStart1=2400000, txEnd1=2600000, txStart2=150000000, @@ -148,7 +148,7 @@ def run_find_shared_dna_test_1000G(self, f): pos=(1, 43800001), rate=(63.0402663602 / (43800001 / 1e6), 0), map_cMs=(0.0, 63.0402663602), - len1=2188, + len1=6096, ) def run_find_shared_dna_test(self, f, HapMap2=True, **kwargs): @@ -388,8 +388,8 @@ def _test_find_shared_dna_two_chrom_shared(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 1 - assert len(d["one_chrom_shared_genes"]) == 3811 - assert len(d["two_chrom_shared_genes"]) == 3811 + assert len(d["one_chrom_shared_genes"]) == 11421 + assert len(d["two_chrom_shared_genes"]) == 11421 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 np.testing.assert_allclose( @@ -416,8 +416,8 @@ def _test_find_shared_dna_two_chrom_shared_1000G(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 1 - assert len(d["one_chrom_shared_genes"]) == 2188 - assert len(d["two_chrom_shared_genes"]) == 2188 + assert len(d["one_chrom_shared_genes"]) == 6096 + assert len(d["two_chrom_shared_genes"]) == 6096 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 np.testing.assert_allclose( @@ -447,8 +447,8 @@ def _test_find_shared_dna_two_chrom_shared_three_ind(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 1 - assert len(d["one_chrom_shared_genes"]) == 3811 - assert len(d["two_chrom_shared_genes"]) == 3811 + assert len(d["one_chrom_shared_genes"]) == 11421 + assert len(d["two_chrom_shared_genes"]) == 11421 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 np.testing.assert_allclose( @@ -475,8 +475,8 @@ def _test_find_shared_dna_two_chrom_shared_no_output(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 1 - assert len(d["one_chrom_shared_genes"]) == 3811 - assert len(d["two_chrom_shared_genes"]) == 3811 + assert len(d["one_chrom_shared_genes"]) == 11421 + assert len(d["two_chrom_shared_genes"]) == 11421 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 np.testing.assert_allclose( @@ -505,7 +505,7 @@ def _test_find_shared_dna_one_chrom_shared(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 0 - assert len(d["one_chrom_shared_genes"]) == 3811 + assert len(d["one_chrom_shared_genes"]) == 11421 assert len(d["two_chrom_shared_genes"]) == 0 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 @@ -535,7 +535,7 @@ def _test_find_shared_dna_one_chrom_shared_three_ind(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 0 - assert len(d["one_chrom_shared_genes"]) == 3811 + assert len(d["one_chrom_shared_genes"]) == 11421 assert len(d["two_chrom_shared_genes"]) == 0 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 @@ -574,8 +574,8 @@ def _test_find_shared_dna_X_chrom_two_individuals_male(self): assert len(d["one_chrom_shared_dna"]) == 1 # PAR1, non-PAR, PAR2 assert len(d["two_chrom_shared_dna"]) == 1 # PAR1 - assert len(d["one_chrom_shared_genes"]) == 3022 - assert len(d["two_chrom_shared_genes"]) == 54 + assert len(d["one_chrom_shared_genes"]) == 8243 + assert len(d["two_chrom_shared_genes"]) == 146 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 np.testing.assert_allclose( @@ -616,8 +616,8 @@ def _test_find_shared_dna_X_chrom_two_individuals_female(self): assert len(d["one_chrom_shared_dna"]) == 1 # PAR1, non-PAR, PAR2 assert len(d["two_chrom_shared_dna"]) == 1 # PAR1, non-PAR, PAR2 - assert len(d["one_chrom_shared_genes"]) == 3022 - assert len(d["two_chrom_shared_genes"]) == 3022 + assert len(d["one_chrom_shared_genes"]) == 8243 + assert len(d["two_chrom_shared_genes"]) == 8243 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 0 np.testing.assert_allclose( @@ -653,8 +653,8 @@ def _test_find_shared_dna_two_chrom_shared_discrepant_snps(self): assert len(d["one_chrom_shared_dna"]) == 1 assert len(d["two_chrom_shared_dna"]) == 1 - assert len(d["one_chrom_shared_genes"]) == 3811 - assert len(d["two_chrom_shared_genes"]) == 3811 + assert len(d["one_chrom_shared_genes"]) == 11421 + assert len(d["two_chrom_shared_genes"]) == 11421 assert len(d["one_chrom_discrepant_snps"]) == 0 assert len(d["two_chrom_discrepant_snps"]) == 2 np.testing.assert_allclose( diff --git a/tests/test_resources.py b/tests/test_resources.py index 524493e..356bdaf 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -191,7 +191,7 @@ def f(): assert len(cytoBand_hg19) == 862 def _generate_test_knownGene_hg19_resource(self): - s = "s\ts\ts\t0\t0\t0\t0\t0\ts\ts\ts\ts\n" * 82960 + s = "s\ts\ts\t0\t0\t0\t0\t0\ts\ts\ts\ts\n" * 254535 mock = mock_open(read_data=gzip.compress(s.encode())) with patch("urllib.request.urlopen", mock): @@ -207,14 +207,14 @@ def f(): self.resource.get_knownGene_hg19() if self.downloads_enabled else f() ) - assert len(knownGene_hg19) == 82960 + assert len(knownGene_hg19) == 254535 # get already loaded resource knownGene_hg19 = self.resource.get_knownGene_hg19() - assert len(knownGene_hg19) == 82960 + assert len(knownGene_hg19) == 254535 def _generate_test_kgXref_hg19_resource(self): - s = "s\ts\ts\ts\ts\ts\ts\ts\n" * 82960 + s = "s\ts\ts\ts\ts\ts\ts\ts\n" * 254491 mock = mock_open(read_data=gzip.compress(s.encode())) with patch("urllib.request.urlopen", mock): @@ -228,11 +228,11 @@ def f(): kgXref_hg19 = self.resource.get_kgXref_hg19() if self.downloads_enabled else f() - assert len(kgXref_hg19) == 82960 + assert len(kgXref_hg19) == 254491 # get already loaded resource kgXref_hg19 = self.resource.get_kgXref_hg19() - assert len(kgXref_hg19) == 82960 + assert len(kgXref_hg19) == 254491 def test_get_all_resources(self): def f():