Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional Digestion Count Output #2460

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Threading;
using Omics.Modifications;
using System.Collections.Concurrent;

Expand All @@ -26,12 +25,14 @@ public class ClassicSearchEngine : MetaMorpheusEngine
private readonly Ms2ScanWithSpecificMass[] ArrayOfSortedMS2Scans;
private readonly double[] MyScanPrecursorMasses;
private readonly bool WriteSpectralLibrary;
private readonly bool WriteDigestionCounts;
private readonly object[] Locks;
public readonly ConcurrentDictionary<string, int> DigestionCountDictionary; // Used to track the amount of digestion products from each protein when the option is enabled.

public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans,
List<Modification> variableModifications, List<Modification> fixedModifications, List<SilacLabel> silacLabels, SilacLabel startLabel, SilacLabel endLabel,
List<Protein> proteinList, MassDiffAcceptor searchMode, CommonParameters commonParameters, List<(string FileName, CommonParameters Parameters)> fileSpecificParameters,
SpectralLibrary spectralLibrary, List<string> nestedIds, bool writeSpectralLibrary)
SpectralLibrary spectralLibrary, List<string> nestedIds, bool writeSpectralLibrary, bool writeDigestionCounts = false)
: base(commonParameters, fileSpecificParameters, nestedIds)
{
PeptideSpectralMatches = globalPsms;
Expand All @@ -48,6 +49,8 @@ public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[]
SearchMode = searchMode;
SpectralLibrary = spectralLibrary;
WriteSpectralLibrary = writeSpectralLibrary;
WriteDigestionCounts = writeDigestionCounts;
DigestionCountDictionary = new();

// Create one lock for each PSM to ensure thread safety
Locks = new object[PeptideSpectralMatches.Length];
Expand Down Expand Up @@ -108,6 +111,9 @@ protected override MetaMorpheusEngineResults RunSpecific()
// digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance
foreach (PeptideWithSetModifications peptide in Proteins[i].Digest(CommonParameters.DigestionParams, FixedModifications, VariableModifications, SilacLabels, TurnoverLabels))
{
if (WriteDigestionCounts)
DigestionCountDictionary.Increment(peptide.Parent.Accession);

PeptideWithSetModifications reversedOnTheFlyDecoy = null;

if (SpectralLibrary != null)
Expand Down
119 changes: 119 additions & 0 deletions MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
using Nett;
using Newtonsoft.Json.Linq;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Numerics;

namespace EngineLayer;

public static class DictionaryExtensions
{
private static readonly object AddOrCreateLock = new object();

nbollis marked this conversation as resolved.
Show resolved Hide resolved
/// <summary>
/// Adds a value to the list associated with the specified key in the dictionary.
/// If the key does not exist, a new list is created with the value and added to the dictionary.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValues">The type of the values in the lists.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value list to add to or create.</param>
/// <param name="value">The value to add to the list associated with the specified key.</param>
/// <remarks>
/// This is not thread-safe!
/// </remarks>
public static void AddOrCreate<TKey, TValues>(this IDictionary<TKey, IList<TValues>> dictionary, TKey key, TValues value)
nbollis marked this conversation as resolved.
Show resolved Hide resolved
{
if (dictionary.TryGetValue(key, out IList<TValues> values))
{
values.Add(value);
}
else
{
dictionary.Add(key, new List<TValues> { value });
}
}

/// <summary>
/// Adds a value to the list associated with the specified key in the dictionary.
/// If the key does not exist, a new list is created with the value and added to the dictionary.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValues">The type of the values in the lists.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value list to add to or create.</param>
/// <param name="value">The value to add to the list associated with the specified key.</param>
/// <remarks>
/// This is thread safe for all dictionary types.
/// </remarks>
public static void AddOrCreateThreadSafe<TKey, TValues>(this IDictionary<TKey, IList<TValues>> dictionary, TKey key, TValues value)
nbollis marked this conversation as resolved.
Show resolved Hide resolved
{
if (dictionary is ConcurrentDictionary<TKey, IList<TValues>> concurrentDictionary)
{
concurrentDictionary.AddOrUpdate(key, new List<TValues> { value }, (k, v) =>
{
lock (AddOrCreateLock)
{
Alexander-Sol marked this conversation as resolved.
Show resolved Hide resolved
v.Add(value);
return v;
}
});
}
else
{
lock (AddOrCreateLock)
{
if (dictionary.TryGetValue(key, out IList<TValues> values))
{
values.Add(value);
}
else
{
dictionary.Add(key, new List<TValues> { value });
}
}
}
}

/// <summary>
/// Increments the value associated with the specified key in the dictionary.
/// If the key does not exist, a new entry is created with the value set to one.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValue">The type of the values in the dictionary, which must implement <see cref="INumber{TValue}"/>.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value to increment or create.</param>
public static void Increment<TKey, TValue>(this IDictionary<TKey, TValue> dictionary, TKey key)
where TValue : INumber<TValue>
{
if (dictionary is ConcurrentDictionary<TKey, TValue> concurrentDictionary)
{
concurrentDictionary.AddOrUpdate(key, TValue.One, (k, v) => v + TValue.One);
}
else
{
if (dictionary.TryGetValue(key, out TValue value))
{
dictionary[key] = value + TValue.One;
}
else
{
dictionary.Add(key, TValue.One);
}
}
}

/// <summary>
/// Determines whether the dictionary is null or has no elements.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValue">The type of the values in the dictionary.</typeparam>
/// <param name="dictionary">The dictionary to check.</param>
/// <returns>
/// <c>true</c> if the dictionary is null or has no elements; otherwise, <c>false</c>.
/// </returns>
public static bool IsNullOrEmpty<TKey, TValue>(this IDictionary<TKey, TValue> dictionary)
{
return dictionary == null || dictionary.Count == 0;
}
}
12 changes: 12 additions & 0 deletions MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,18 @@
</TextBlock>
</ToolTipService.ToolTip>
</CheckBox>
<CheckBox x:Name="WriteDigestCountCheckBox" Margin="20 0 0 0"
Content="Write digestion product count histogram" IsEnabled="{Binding IsChecked, ElementName=ClassicSearchRadioButton}">
<ToolTipService.ToolTip>
<TextBlock>
Checking this box will create an additional output file with a histogram of the number of digestion products per protein.
<LineBreak/>
The number of digestion products is limited by the MaxModsPerPeptide parameter and limited to the MaxModifiedIsoforms parameter for each primary sequence generated by each database entry
<LineBreak/>
Multiple base sequences can be generated per protein due to variable methionine and splice variants if annotated in a database
</TextBlock>
</ToolTipService.ToolTip>
</CheckBox>
</StackPanel>
</Expander>
</GroupBox>
Expand Down
2 changes: 2 additions & 0 deletions MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ private void UpdateFieldsFromTask(SearchTask task)

OutputFileNameTextBox.Text = task.CommonParameters.TaskDescriptor;
CkbMzId.IsChecked = task.SearchParameters.WriteMzId;
WriteDigestCountCheckBox.IsChecked = task.SearchParameters.WriteDigestionProductCountFile;
WriteHighQPsmsCheckBox.IsChecked = task.SearchParameters.WriteHighQValuePsms;
WriteDecoyCheckBox.IsChecked = task.SearchParameters.WriteDecoys;
WriteContaminantCheckBox.IsChecked = task.SearchParameters.WriteContaminants;
Expand Down Expand Up @@ -650,6 +651,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e)
TheTask.SearchParameters.UpdateSpectralLibrary = UpdateSpectralLibraryCheckBox.IsChecked.Value;
TheTask.SearchParameters.CompressIndividualFiles = CompressIndividualResultsCheckBox.IsChecked.Value;
TheTask.SearchParameters.IncludeModMotifInMzid = IncludeMotifInModNamesCheckBox.IsChecked.Value;
TheTask.SearchParameters.WriteDigestionProductCountFile = WriteDigestCountCheckBox.IsChecked.Value;

if (RemoveContaminantRadioBox.IsChecked.Value)
{
Expand Down
64 changes: 63 additions & 1 deletion MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
/// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary()
/// </summary>
private Dictionary<(string,string),string> ResultsDictionary { get; set; }

/// <summary>
/// Used for storage of results for writing digestion product counts to a .tsv.
/// </summary>
internal IDictionary<string, int>? DigestionCountDictionary { get; set; }

Check warning on line 41 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / ubuntu-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / windows-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / windows-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 41 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View workflow job for this annotation

GitHub Actions / macos-latest

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.
public PostSearchAnalysisTask()
: base(MyTask.Search)
{
Expand Down Expand Up @@ -110,6 +113,12 @@
UpdateSpectralLibrary();
}

if (DigestionCountDictionary != null && DigestionCountDictionary.Any()) // Will be null or empty if no digestion count output file is desired.
{
WriteDigestionCountByProtein();
WriteDigestionCountHistogram();
}

WriteFlashLFQResults();

if (Parameters.ProteinList.Any((p => p.AppliedSequenceVariations.Count > 0)))
Expand Down Expand Up @@ -1939,5 +1948,58 @@

FinishedWritingFile(peaksPath, nestedIds);
}

/// <summary>
/// Writes the digestion product counts for each protein to a .tsv file.
/// </summary>
private void WriteDigestionCountByProtein()
{
if (DigestionCountDictionary.IsNullOrEmpty())
return;

Check warning on line 1958 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs#L1958

Added line #L1958 was not covered by tests

var nestedIds = new List<string> { Parameters.SearchTaskId };
var countByProteinPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountsBy{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s.tsv");

// write all values to file
using (var writer = new StreamWriter(countByProteinPath))
{
writer.WriteLine("Protein Accession\tDigestion Products");
foreach (var proteinEntry in DigestionCountDictionary!)
{
writer.WriteLine($"{proteinEntry.Key}\t{proteinEntry.Value}");
}
}
FinishedWritingFile(countByProteinPath, nestedIds);
}

/// <summary>
/// Writes a histogram of digestion product counts to a .tsv file.
/// </summary>
private void WriteDigestionCountHistogram()
{
if (DigestionCountDictionary.IsNullOrEmpty())
return;

Check warning on line 1981 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs#L1981

Added line #L1981 was not covered by tests

var nestedIds = new List<string> { Parameters.SearchTaskId };
var countHistogramPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountHistogram.tsv");

// Create Histogram
var countDictionary = new Dictionary<int, int>(CommonParameters.DigestionParams.MaxModificationIsoforms);
foreach (var proteinEntry in DigestionCountDictionary!)
{
countDictionary.Increment(proteinEntry.Value);
}

// Write Histogram
using (StreamWriter writer = new(countHistogramPath))
{
writer.WriteLine($"Digestion Products\tCount of {GlobalVariables.AnalyteType.GetBioPolymerLabel()}s");
foreach (var count in countDictionary.OrderBy(p => p.Key))
{
writer.WriteLine($"{count.Key}\t{count.Value}");
}
}
FinishedWritingFile(countHistogramPath, nestedIds);
}
}
}
3 changes: 2 additions & 1 deletion MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using UsefulProteomicsDatabases;
using EngineLayer;
using Omics.Modifications;
using Proteomics;

namespace TaskLayer
{
Expand Down Expand Up @@ -32,6 +31,7 @@ public SearchParameters()
WriteMzId = true;
WritePepXml = false;
IncludeModMotifInMzid = false;
WriteDigestionProductCountFile = false;

ModsToWriteSelection = new Dictionary<string, int>
{
Expand Down Expand Up @@ -103,5 +103,6 @@ public SearchParameters()
public SilacLabel EndTurnoverLabel { get; set; } //used for SILAC turnover experiments
public TargetContaminantAmbiguity TCAmbiguity { get; set; }
public bool IncludeModMotifInMzid { get; set; }
public bool WriteDigestionProductCountFile { get; set; }
}
}
15 changes: 12 additions & 3 deletions MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
Status("Searching files...", new List<string> { taskId, "Individual Spectra Files" });

Dictionary<string, int[]> numMs2SpectraPerFile = new Dictionary<string, int[]>();
IDictionary<string, int> digestionCountDictionary = null;
for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
{
if (GlobalVariables.StopLoops) { break; }
Expand Down Expand Up @@ -374,8 +375,15 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
{
Status("Starting search...", thisId);
var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId,SearchParameters.WriteSpectralLibrary);
newClassicSearchEngine.Run();
SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId,SearchParameters.WriteSpectralLibrary, SearchParameters.WriteDigestionProductCountFile);
var result = newClassicSearchEngine.Run();

// The same proteins (all of them) get digested with each classic search engine, therefor we only need to calculate this for the first file that runs
if (SearchParameters.WriteDigestionProductCountFile)
{
SearchParameters.WriteDigestionProductCountFile = false;
digestionCountDictionary = (result.MyEngine as ClassicSearchEngine).DigestionCountDictionary;
}

ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
}
Expand Down Expand Up @@ -447,7 +455,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
{
Parameters = parameters,
FileSpecificParameters = this.FileSpecificParameters,
CommonParameters = CommonParameters
CommonParameters = CommonParameters,
DigestionCountDictionary = digestionCountDictionary
};
return postProcessing.Run();
}
Expand Down
Loading