Skip to content

Commit

Permalink
Added AMX acceleration (#39)
Browse files Browse the repository at this point in the history
* Added conformance of SimilarityIndex to Identifiable

* Added conformance to Hashable

* Removed print statements.

* Added AMX acceleration.

* Removed deprecated methods & added logger for save / load index operations.
  • Loading branch information
johnbean393 authored Jun 4, 2024
1 parent d01772c commit 6f02580
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//

import Foundation
import Accelerate

/// A struct implementing the `DistanceMetricProtocol` using the dot product.
///
Expand All @@ -17,14 +18,22 @@ public struct DotProduct: DistanceMetricProtocol {

public func findNearest(for queryEmbedding: [Float], in neighborEmbeddings: [[Float]], resultsCount: Int) -> [(Float, Int)] {
let scores = neighborEmbeddings.map { distance(between: queryEmbedding, and: $0) }

return sortedScores(scores: scores, topK: resultsCount)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
let dotProduct = zip(firstEmbedding, secondEmbedding).map(*).reduce(0, +)
return dotProduct
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Ensure the embeddings have the same length
precondition(firstEmbedding.count == secondEmbedding.count, "Embeddings must have the same length")

var dotProduct: Float = 0

// Calculate dot product using Accelerate
vDSP_dotpr(firstEmbedding, 1, secondEmbedding, 1, &dotProduct, vDSP_Length(firstEmbedding.count))

return dotProduct
}

}

/// A struct implementing the `DistanceMetricProtocol` using cosine similarity.
Expand All @@ -37,18 +46,31 @@ public struct CosineSimilarity: DistanceMetricProtocol {

public func findNearest(for queryEmbedding: [Float], in neighborEmbeddings: [[Float]], resultsCount: Int) -> [(Float, Int)] {
let scores = neighborEmbeddings.map { distance(between: queryEmbedding, and: $0) }

return sortedScores(scores: scores, topK: resultsCount)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Calculate cosine distance
let dotProduct = zip(firstEmbedding, secondEmbedding).map(*).reduce(0, +)
let firstMagnitude = sqrt(firstEmbedding.map { $0 * $0 }.reduce(0, +))
let secondMagnitude = sqrt(secondEmbedding.map { $0 * $0 }.reduce(0, +))

return dotProduct / (firstMagnitude * secondMagnitude)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Ensure the embeddings have the same length
precondition(firstEmbedding.count == secondEmbedding.count, "Embeddings must have the same length")

var dotProduct: Float = 0
var firstMagnitude: Float = 0
var secondMagnitude: Float = 0

// Calculate dot product and magnitudes using Accelerate
vDSP_dotpr(firstEmbedding, 1, secondEmbedding, 1, &dotProduct, vDSP_Length(firstEmbedding.count))
vDSP_svesq(firstEmbedding, 1, &firstMagnitude, vDSP_Length(firstEmbedding.count))
vDSP_svesq(secondEmbedding, 1, &secondMagnitude, vDSP_Length(secondEmbedding.count))

// Take square root of magnitudes
firstMagnitude = sqrt(firstMagnitude)
secondMagnitude = sqrt(secondMagnitude)

// Return cosine similarity
return dotProduct / (firstMagnitude * secondMagnitude)
}

}

/// A struct implementing the `DistanceMetricProtocol` using Euclidean distance.
Expand All @@ -57,18 +79,26 @@ public struct CosineSimilarity: DistanceMetricProtocol {
///
/// - Note: Use this metric when the magnitudes of the embeddings are significant in your use case, and the embeddings are distributed in a Euclidean space.
public struct EuclideanDistance: DistanceMetricProtocol {

public init() {}

public func findNearest(for queryEmbedding: [Float], in neighborEmbeddings: [[Float]], resultsCount: Int) -> [(Float, Int)] {
let distances = neighborEmbeddings.map { distance(between: queryEmbedding, and: $0) }

return sortedDistances(distances: distances, topK: resultsCount)
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
let squaredDifferences = zip(firstEmbedding, secondEmbedding).map { ($0 - $1) * ($0 - $1) }
return sqrt(squaredDifferences.reduce(0, +))
}

public func distance(between firstEmbedding: [Float], and secondEmbedding: [Float]) -> Float {
// Ensure the embeddings have the same length
precondition(firstEmbedding.count == secondEmbedding.count, "Embeddings must have the same length")

var distance: Float = 0

// Calculate squared differences and sum them using Accelerate
vDSP_distancesq(firstEmbedding, 1, secondEmbedding, 1, &distance, vDSP_Length(firstEmbedding.count))

// Return the square root of the summed squared differences
return sqrt(distance)
}
}

// MARK: - Helpers
Expand Down
20 changes: 17 additions & 3 deletions Sources/SimilaritySearchKit/Core/Index/SimilarityIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//

import Foundation
import OSLog

// MARK: - Type Aliases

Expand All @@ -17,9 +18,21 @@ public typealias TextSplitterType = SimilarityIndex.TextSplitterType
public typealias VectorStoreType = SimilarityIndex.VectorStoreType

@available(macOS 11.0, iOS 15.0, *)
public class SimilarityIndex {
public class SimilarityIndex: Identifiable, Hashable {

public static func == (lhs: SimilarityIndex, rhs: SimilarityIndex) -> Bool {
return lhs.id == rhs.id
}

public func hash(into hasher: inout Hasher) {
hasher.combine(id)
}

// MARK: - Properties

/// A unique identifier
public var id: UUID = UUID()

/// The items stored in the index.
public var indexItems: [IndexItem] = []

Expand Down Expand Up @@ -306,6 +319,7 @@ extension SimilarityIndex {

@available(macOS 13.0, iOS 16.0, *)
extension SimilarityIndex {

public func saveIndex(toDirectory path: URL? = nil, name: String? = nil) throws -> URL {
let indexName = name ?? self.indexName
let basePath: URL
Expand All @@ -319,15 +333,15 @@ extension SimilarityIndex {

let savedVectorStore = try vectorStore.saveIndex(items: indexItems, to: basePath, as: indexName)

print("Saved \(indexItems.count) index items to \(savedVectorStore.absoluteString)")
let bundleId: String = Bundle.main.bundleIdentifier ?? "com.similarity-search-kit.logger"
let logger: Logger = Logger(subsystem: bundleId, category: "similarityIndexSave")

return savedVectorStore
}

public func loadIndex(fromDirectory path: URL? = nil, name: String? = nil) throws -> [IndexItem]? {
if let indexPath = try getIndexPath(fromDirectory: path, name: name) {
indexItems = try vectorStore.loadIndex(from: indexPath)
print("Loaded \(indexItems.count) index items from \(indexPath.absoluteString)")
return indexItems
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import Foundation

public class JsonStore: VectorStoreProtocol {

public func saveIndex(items: [IndexItem], to url: URL, as name: String) throws -> URL {
let encoder = JSONEncoder()
let data = try encoder.encode(items)
Expand Down

0 comments on commit 6f02580

Please sign in to comment.