diff --git a/GeUtilities/Intervals/Functions/HashFunctions.cs b/GeUtilities/Intervals/Functions/HashFunctions.cs
index 56b527f..2e0d89f 100644
--- a/GeUtilities/Intervals/Functions/HashFunctions.cs
+++ b/GeUtilities/Intervals/Functions/HashFunctions.cs
@@ -9,6 +9,12 @@ public static class HashFunctions
private const uint _FNVPrime_32 = 16777619;
private const uint _FNVOffsetBasis_32 = 2166136261;
+ ///
+ /// Sets and gets a string used as a delimiter separating
+ /// properties used as hash seed.
+ ///
+ public const string HashSeedDelimiter = ";;";
+
public static uint FNVHashFunction(string bytes)
{
uint hash = _FNVOffsetBasis_32;
@@ -20,5 +26,10 @@ public static uint FNVHashFunction(string bytes)
return hash;
}
+
+ public static string GetHashSeed(params string[] properties)
+ {
+ return string.Join(HashSeedDelimiter, properties);
+ }
}
}
diff --git a/GeUtilities/Intervals/Genome/Chromosome.cs b/GeUtilities/Intervals/Genome/Chromosome.cs
index cb0eaa9..69d1999 100644
--- a/GeUtilities/Intervals/Genome/Chromosome.cs
+++ b/GeUtilities/Intervals/Genome/Chromosome.cs
@@ -20,12 +20,20 @@ public Chromosome()
Strands = new Dictionary>();
}
- public void Add(I interval, char strand)
+ public bool TryAdd(I interval, char strand)
{
if (!Strands.ContainsKey(strand))
Strands.Add(strand, new Strand());
+
if (Strands[strand].TryAdd(interval))
+ {
Statistics.Update(interval);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
}
}
}
diff --git a/GeUtilities/Intervals/Model/GeneralFeature.cs b/GeUtilities/Intervals/Model/GeneralFeature.cs
index de4e9f5..d730571 100644
--- a/GeUtilities/Intervals/Model/GeneralFeature.cs
+++ b/GeUtilities/Intervals/Model/GeneralFeature.cs
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.
using Genometric.GeUtilities.IGenomics;
+using Genometric.GeUtilities.Intervals.Functions;
namespace Genometric.GeUtilities.Intervals.Model
{
@@ -10,7 +11,7 @@ public class GeneralFeature : Interval, IGeneralFeature
{
public GeneralFeature(int left, int right, string source, string feature, double score,
string frame, string attribute, string hashSeed = "") :
- base(left, right, source + feature + score.ToString() + frame + attribute + hashSeed)
+ base(left, right, HashFunctions.GetHashSeed(source, feature, score.ToString(), frame, attribute, hashSeed))
{
Source = source;
Feature = feature;
diff --git a/GeUtilities/Intervals/Model/Interval.cs b/GeUtilities/Intervals/Model/Interval.cs
index 1733a91..b43b6fd 100644
--- a/GeUtilities/Intervals/Model/Interval.cs
+++ b/GeUtilities/Intervals/Model/Interval.cs
@@ -16,7 +16,7 @@ public Interval(int left, int right, string hashSeed = "")
unchecked
{
- _hashKey = (int)HashFunctions.FNVHashFunction(left.ToString() + right.ToString() + hashSeed);
+ _hashKey = (int)HashFunctions.FNVHashFunction(HashFunctions.GetHashSeed(left.ToString(), right.ToString(), hashSeed));
}
}
diff --git a/GeUtilities/Intervals/Model/Peak.cs b/GeUtilities/Intervals/Model/Peak.cs
index 06b9b33..76ec0c5 100644
--- a/GeUtilities/Intervals/Model/Peak.cs
+++ b/GeUtilities/Intervals/Model/Peak.cs
@@ -3,13 +3,14 @@
// See the LICENSE file in the project root for more information.
using Genometric.GeUtilities.IGenomics;
+using Genometric.GeUtilities.Intervals.Functions;
namespace Genometric.GeUtilities.Intervals.Model
{
public class Peak : Interval, IPeak
{
public Peak(int left, int right, double value, string name = null, int summit = -1, string hashSeed = "") :
- base(left, right, value.ToString() + summit.ToString() + name + hashSeed)
+ base(left, right, HashFunctions.GetHashSeed(value.ToString(), summit.ToString(), name, hashSeed))
{
Value = value;
Summit = summit != -1 ? summit : (right - left) / 2;
diff --git a/GeUtilities/Intervals/Model/RefSeqGene.cs b/GeUtilities/Intervals/Model/RefSeqGene.cs
index 3934541..286daf2 100644
--- a/GeUtilities/Intervals/Model/RefSeqGene.cs
+++ b/GeUtilities/Intervals/Model/RefSeqGene.cs
@@ -3,13 +3,14 @@
// See the LICENSE file in the project root for more information.
using Genometric.GeUtilities.IGenomics;
+using Genometric.GeUtilities.Intervals.Functions;
namespace Genometric.GeUtilities.Intervals.Model
{
public class RefSeqGene : Interval, IRefSeqGene
{
public RefSeqGene(int left, int right, string refSeqID, string geneSymbol, string hashSeed = "") :
- base(left, right, refSeqID + geneSymbol + hashSeed)
+ base(left, right, HashFunctions.GetHashSeed(refSeqID, geneSymbol, hashSeed))
{
RefSeqID = refSeqID;
GeneSymbol = geneSymbol;
diff --git a/GeUtilities/Intervals/Model/Variant.cs b/GeUtilities/Intervals/Model/Variant.cs
index e83ce32..3944ded 100644
--- a/GeUtilities/Intervals/Model/Variant.cs
+++ b/GeUtilities/Intervals/Model/Variant.cs
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.
using Genometric.GeUtilities.IGenomics;
+using Genometric.GeUtilities.Intervals.Functions;
namespace Genometric.GeUtilities.Intervals.Model
{
@@ -10,8 +11,8 @@ public class Variant : Interval, IVariant
{
public Variant(int left, int right, string id, Base[] refBase, Base[] altBase, double quality,
string filter, string info, string hashSeed = "") :
- base(left, right, id + (refBase == null ? "" : refBase.ToString())
- + (altBase == null ? "" : altBase.ToString()) + quality.ToString() + filter + info + hashSeed)
+ base(left, right, HashFunctions.GetHashSeed(id, (refBase == null ? "" : refBase.ToString()),
+ (altBase == null ? "" : altBase.ToString()), quality.ToString(), filter, info, hashSeed))
{
ID = id;
RefBase = refBase;
diff --git a/GeUtilities/Intervals/Parsers/Model/ParsedIntervals.cs b/GeUtilities/Intervals/Parsers/Model/ParsedIntervals.cs
index 41df14d..15b4f17 100644
--- a/GeUtilities/Intervals/Parsers/Model/ParsedIntervals.cs
+++ b/GeUtilities/Intervals/Parsers/Model/ParsedIntervals.cs
@@ -29,12 +29,19 @@ protected ParsedIntervals()
Statistics = new S();
}
- public void Add(I interval, string chr, char strand)
+ public bool TryAdd(I interval, string chr, char strand)
{
if (!Chromosomes.ContainsKey(chr))
Chromosomes.Add(chr, new Chromosome());
- Chromosomes[chr].Add(interval, strand);
- Statistics.Update(interval);
+ if (Chromosomes[chr].TryAdd(interval, strand))
+ {
+ Statistics.Update(interval);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
}
}
}
diff --git a/GeUtilities/Intervals/Parsers/Parser.cs b/GeUtilities/Intervals/Parsers/Parser.cs
index d8d90fd..fc76391 100644
--- a/GeUtilities/Intervals/Parsers/Parser.cs
+++ b/GeUtilities/Intervals/Parsers/Parser.cs
@@ -262,7 +262,7 @@ private void Parse()
continue;
}
- I readingInterval = BuildInterval(left, right, splittedLine, lineCounter, _data.FileHashKey + lineCounter.ToString());
+ I readingInterval = BuildInterval(left, right, splittedLine, lineCounter, HashFunctions.GetHashSeed(_data.FileHashKey.ToString(), lineCounter.ToString()));
if (DropReadingPeak)
continue;
@@ -291,8 +291,13 @@ private void Parse()
(char.TryParse(splittedLine[_strandColumn], out strand) && strand != '+' && strand != '-' && strand != UnspecifiedStrandChar))
strand = UnspecifiedStrandChar;
- _data.Add(readingInterval, chrName, strand);
- _data.IntervalsCount++;
+ if (_data.TryAdd(readingInterval, chrName, strand))
+ _data.IntervalsCount++;
+ else
+ {
+ DropLine("\tLine " + lineCounter.ToString() + "\t:\tPossibly Hash key collision.");
+ continue;
+ }
}
}
}