Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added AMX acceleration #39

Merged
merged 5 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//

import Foundation
import Accelerate

/// A struct implementing the `DistanceMetricProtocol` using the dot product.
///
Expand All @@ -17,14 +18,26 @@ public struct DotProduct: DistanceMetricProtocol {

public func findNearest(for queryEmbedding: [Float], in neighborEmbeddings: [[Float]], resultsCount: Int) -> [(Float, Int)] {
let scores = neighborEmbeddings.map { distance(between: queryEmbedding, and: $0) }

return sortedScores(scores: scores, topK: resultsCount)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
let dotProduct = zip(firstEmbedding, secondEmbedding).map(*).reduce(0, +)
return dotProduct
}
// public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// let dotProduct = zip(firstEmbedding, secondEmbedding).map(*).reduce(0, +)
// return dotProduct
// }
johnbean393 marked this conversation as resolved.
Show resolved Hide resolved

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Ensure the embeddings have the same length
precondition(firstEmbedding.count == secondEmbedding.count, "Embeddings must have the same length")

var dotProduct: Float = 0

// Calculate dot product using Accelerate
vDSP_dotpr(firstEmbedding, 1, secondEmbedding, 1, &dotProduct, vDSP_Length(firstEmbedding.count))

return dotProduct
}

}

/// A struct implementing the `DistanceMetricProtocol` using cosine similarity.
Expand All @@ -37,18 +50,39 @@ public struct CosineSimilarity: DistanceMetricProtocol {

public func findNearest(for queryEmbedding: [Float], in neighborEmbeddings: [[Float]], resultsCount: Int) -> [(Float, Int)] {
let scores = neighborEmbeddings.map { distance(between: queryEmbedding, and: $0) }

return sortedScores(scores: scores, topK: resultsCount)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Calculate cosine distance
let dotProduct = zip(firstEmbedding, secondEmbedding).map(*).reduce(0, +)
let firstMagnitude = sqrt(firstEmbedding.map { $0 * $0 }.reduce(0, +))
let secondMagnitude = sqrt(secondEmbedding.map { $0 * $0 }.reduce(0, +))

return dotProduct / (firstMagnitude * secondMagnitude)
}
// public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// // Calculate cosine distance
// let dotProduct = zip(firstEmbedding, secondEmbedding).map(*).reduce(0, +)
// let firstMagnitude = sqrt(firstEmbedding.map { $0 * $0 }.reduce(0, +))
// let secondMagnitude = sqrt(secondEmbedding.map { $0 * $0 }.reduce(0, +))
//
// return dotProduct / (firstMagnitude * secondMagnitude)
// }
johnbean393 marked this conversation as resolved.
Show resolved Hide resolved

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Ensure the embeddings have the same length
precondition(firstEmbedding.count == secondEmbedding.count, "Embeddings must have the same length")

var dotProduct: Float = 0
var firstMagnitude: Float = 0
var secondMagnitude: Float = 0

// Calculate dot product and magnitudes using Accelerate
vDSP_dotpr(firstEmbedding, 1, secondEmbedding, 1, &dotProduct, vDSP_Length(firstEmbedding.count))
vDSP_svesq(firstEmbedding, 1, &firstMagnitude, vDSP_Length(firstEmbedding.count))
vDSP_svesq(secondEmbedding, 1, &secondMagnitude, vDSP_Length(secondEmbedding.count))

// Take square root of magnitudes
firstMagnitude = sqrt(firstMagnitude)
secondMagnitude = sqrt(secondMagnitude)

// Return cosine similarity
return dotProduct / (firstMagnitude * secondMagnitude)
}

}

/// A struct implementing the `DistanceMetricProtocol` using Euclidean distance.
Expand All @@ -57,18 +91,31 @@ public struct CosineSimilarity: DistanceMetricProtocol {
///
/// - Note: Use this metric when the magnitudes of the embeddings are significant in your use case, and the embeddings are distributed in a Euclidean space.
public struct EuclideanDistance: DistanceMetricProtocol {

public init() {}

public func findNearest(for queryEmbedding: [Float], in neighborEmbeddings: [[Float]], resultsCount: Int) -> [(Float, Int)] {
let distances = neighborEmbeddings.map { distance(between: queryEmbedding, and: $0) }

return sortedDistances(distances: distances, topK: resultsCount)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
let squaredDifferences = zip(firstEmbedding, secondEmbedding).map { ($0 - $1) * ($0 - $1) }
return sqrt(squaredDifferences.reduce(0, +))
}
// public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// let squaredDifferences = zip(firstEmbedding, secondEmbedding).map { ($0 - $1) * ($0 - $1) }
// return sqrt(squaredDifferences.reduce(0, +))
// }
johnbean393 marked this conversation as resolved.
Show resolved Hide resolved

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Ensure the embeddings have the same length
precondition(firstEmbedding.count == secondEmbedding.count, "Embeddings must have the same length")

var distance: Float = 0

// Calculate squared differences and sum them using Accelerate
vDSP_distancesq(firstEmbedding, 1, secondEmbedding, 1, &distance, vDSP_Length(firstEmbedding.count))

// Return the square root of the summed squared differences
return sqrt(distance)
}
}

// MARK: - Helpers
Expand Down
18 changes: 15 additions & 3 deletions Sources/SimilaritySearchKit/Core/Index/SimilarityIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,22 @@ public typealias TextSplitterType = SimilarityIndex.TextSplitterType
public typealias VectorStoreType = SimilarityIndex.VectorStoreType

@available(macOS 11.0, iOS 15.0, *)
public class SimilarityIndex {
public class SimilarityIndex: Identifiable, Hashable {

public static func == (lhs: SimilarityIndex, rhs: SimilarityIndex) -> Bool {
return lhs.id == rhs.id
}


public func hash(into hasher: inout Hasher) {
hasher.combine(id)
}

johnbean393 marked this conversation as resolved.
Show resolved Hide resolved
// MARK: - Properties

/// A unique identifier
public var id: UUID = UUID()

/// The items stored in the index.
public var indexItems: [IndexItem] = []

Expand Down Expand Up @@ -306,6 +319,7 @@ extension SimilarityIndex {

@available(macOS 13.0, iOS 16.0, *)
extension SimilarityIndex {

public func saveIndex(toDirectory path: URL? = nil, name: String? = nil) throws -> URL {
let indexName = name ?? self.indexName
let basePath: URL
Expand All @@ -319,15 +333,13 @@ extension SimilarityIndex {

let savedVectorStore = try vectorStore.saveIndex(items: indexItems, to: basePath, as: indexName)

print("Saved \(indexItems.count) index items to \(savedVectorStore.absoluteString)")
johnbean393 marked this conversation as resolved.
Show resolved Hide resolved

return savedVectorStore
}

public func loadIndex(fromDirectory path: URL? = nil, name: String? = nil) throws -> [IndexItem]? {
if let indexPath = try getIndexPath(fromDirectory: path, name: name) {
indexItems = try vectorStore.loadIndex(from: indexPath)
print("Loaded \(indexItems.count) index items from \(indexPath.absoluteString)")
return indexItems
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import Foundation

public class JsonStore: VectorStoreProtocol {

public func saveIndex(items: [IndexItem], to url: URL, as name: String) throws -> URL {
let encoder = JSONEncoder()
let data = try encoder.encode(items)
Expand Down
Loading