Skip to content

Commit

Permalink
src/lineage_processing.jl: Add min_distance column for every sequence…
Browse files Browse the repository at this point in the history
… within a cluster to it's closest sequence.
  • Loading branch information
mashu committed Nov 17, 2024
1 parent 7ef9a54 commit 515526c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LineageCollapse"
uuid = "e38bdfdf-80f5-4f0c-93e0-53dd02ee37b8"
authors = ["Mateusz Kaduk <[email protected]> and contributors"]
version = "0.0.11"
version = "0.0.12"

[deps]
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
Expand Down
14 changes: 12 additions & 2 deletions src/lineage_processing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,24 @@ function process_lineages(df::DataFrame;
if nrow(unique_dna_data) > 1
dna_seqs = LongDNA{4}.(unique_dna_data.cdr3)
dist_matrix = compute_pairwise_distance(distance_metric, dna_seqs)

# Calculate minimum distances for each sequence, excluding zeros on diagonal
min_distances = zeros(Float32, size(dist_matrix, 1))
for i in 1:size(dist_matrix, 1)
non_zero_distances = filter(x -> x > 0, dist_matrix[i,:])
min_distances[i] = isempty(non_zero_distances) ? 0.0f0 : minimum(non_zero_distances)
end
unique_dna_data[!, :min_distance] = min_distances

unique_dna_data[!, :cluster] = perform_clustering(clustering_method, linkage, dist_matrix)
else
unique_dna_data[!, :cluster] .= 1
unique_dna_data[!, :min_distance] .= 0.0f0 # Single sequence has no distances to others
end

# Map clusters back to all sequences
# Map clusters and min distances back to all sequences
group_result = leftjoin(group,
select(unique_dna_data, :cdr3, :cluster),
select(unique_dna_data, :cdr3, :cluster, :min_distance),
on = :cdr3)

# Process statistics
Expand Down

2 comments on commit 515526c

@mashu
Copy link
Owner Author

@mashu mashu commented on 515526c Nov 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

Adds nearest neighbour distance indicated by min_distance column.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/119645

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.0.12 -m "<description of version>" 515526c92444e9d0bdbb68fd9ff7b3c4fe1d3883
git push origin v0.0.12

Please sign in to comment.