Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ let package = Package(
platforms: [
.iOS(.v16),
.macOS(.v13),
.watchOS(.v10),
.visionOS(.v1)
],
products: [
.library(
Expand Down
3 changes: 0 additions & 3 deletions Sources/WhisperKit/Core/Audio/AudioChunker.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ import AVFoundation
import Foundation

/// Responsible for chunking audio into smaller pieces
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol AudioChunking {
func chunkAll(audioArray: [Float], maxChunkLength: Int, decodeOptions: DecodingOptions?) async throws -> [AudioChunk]
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioChunking {
func updateSeekOffsetsForResults(
chunkedResults: [Result<[TranscriptionResult], Swift.Error>],
Expand Down Expand Up @@ -42,7 +40,6 @@ public extension AudioChunking {
}

/// A audio chunker that splits audio into smaller pieces based on voice activity detection
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class VADAudioChunker: AudioChunking {
/// prevent hallucinations at the end of the clip by stopping up to 1.0s early
private let windowPadding: Int
Expand Down
3 changes: 0 additions & 3 deletions Sources/WhisperKit/Core/Audio/AudioProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ public extension AudioProcessing {
/// Loads and converts audio data from a specified file paths.
/// - Parameter audioPaths: The file paths of the audio files.
/// - Returns: `AVAudioPCMBuffer` containing the audio data.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
static func loadAudioAsync(fromPath audioFilePath: String) async throws -> AVAudioPCMBuffer {
return try await Task {
try AudioProcessor.loadAudio(fromPath: audioFilePath)
Expand Down Expand Up @@ -191,7 +190,6 @@ public extension AudioProcessing {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class AudioProcessor: NSObject, AudioProcessing {
private var lastInputDevice: DeviceID?
public var audioEngine: AVAudioEngine?
Expand Down Expand Up @@ -886,7 +884,6 @@ open class AudioProcessor: NSObject, AudioProcessing {

// MARK: - Streaming

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioProcessor {
/// We have a new buffer, process and store it.
/// NOTE: Assumes audio is 16khz mono
Expand Down
3 changes: 0 additions & 3 deletions Sources/WhisperKit/Core/Audio/AudioStreamTranscriber.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import Foundation

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioStreamTranscriber {
struct State {
public var isRecording: Bool = false
Expand All @@ -18,11 +17,9 @@ public extension AudioStreamTranscriber {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public typealias AudioStreamTranscriberCallback = (AudioStreamTranscriber.State, AudioStreamTranscriber.State) -> Void

/// Responsible for streaming audio from the microphone, processing it, and transcribing it in real-time.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public actor AudioStreamTranscriber {
private var state: AudioStreamTranscriber.State = .init() {
didSet {
Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Core/Audio/EnergyVAD.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import Foundation

/// Voice activity detection based on energy threshold
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public final class EnergyVAD: VoiceActivityDetector {
public let energyThreshold: Float

Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Core/Audio/VoiceActivityDetector.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import Foundation

/// A base class for Voice Activity Detection (VAD), used to identify and separate segments of audio that contain human speech from those that do not.
/// Subclasses must implement the `voiceActivity(in:)` method to provide specific voice activity detection functionality.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class VoiceActivityDetector {
/// The sample rate of the audio signal, in samples per second.
public let sampleRate: Int
Expand Down
2 changes: 0 additions & 2 deletions Sources/WhisperKit/Core/AudioEncoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ public protocol AudioEncoderOutputType {}
extension MLMultiArray: AudioEncoderOutputType {}

/// AudioEncoding protocol defines the requirements for an audio encoding implementation.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol AudioEncoding {
/// The size of the embedding produced by the encoder.
var embedSize: Int? { get }
Expand All @@ -19,7 +18,6 @@ public protocol AudioEncoding {
}

/// Backwards-compatible AudioEncoder implementation
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoder: AudioEncoding, WhisperMLModel {
public var model: MLModel?

Expand Down
2 changes: 0 additions & 2 deletions Sources/WhisperKit/Core/Configurations.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import Foundation

/// Configuration to initialize WhisperKit
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class WhisperKitConfig {
/// Name for whisper model to use
public var model: String?
Expand Down Expand Up @@ -126,7 +125,6 @@ open class WhisperKitConfig {
/// - firstTokenLogProbThreshold: If the log probability over the first sampled token is below this value, treat as failed.
/// - noSpeechThreshold: If the no speech probability is higher than this value AND the average log
/// probability over sampled tokens is below `logProbThreshold`, consider the segment as silent.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public struct DecodingOptions: Codable, Sendable {
public var verbose: Bool
public var task: DecodingTask
Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Core/FeatureExtractor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ public protocol FeatureExtracting {
func logMelSpectrogram(fromAudio inputAudio: any AudioProcessorOutputType) async throws -> (any FeatureExtractorOutputType)?
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class FeatureExtractor: FeatureExtracting, WhisperMLModel {
public var model: MLModel?

Expand Down
11 changes: 0 additions & 11 deletions Sources/WhisperKit/Core/Models.swift
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ public enum ModelState: CustomStringConvertible {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public struct ModelComputeOptions: Sendable {
public var melCompute: MLComputeUnits
public var audioEncoderCompute: MLComputeUnits
Expand Down Expand Up @@ -252,7 +251,6 @@ public struct ModelSupportConfig: Codable, Sendable {
computeDisabledModels()
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public func modelSupport(for deviceIdentifier: String = WhisperKit.deviceName()) -> ModelSupport {
// Find the support with the longest matching identifier prefix
// i.e. `iPad13,16` should match exact `iPad13,16` instead of first prefix like `iPad13,1`
Expand Down Expand Up @@ -409,7 +407,6 @@ public enum ChunkingStrategy: String, Codable, CaseIterable {
case vad
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public struct DecodingFallback: Sendable {
public var needsFallback: Bool
public var fallbackReason: String
Expand All @@ -420,7 +417,6 @@ public struct DecodingFallback: Sendable {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension DecodingFallback {
init?(
options: DecodingOptions,
Expand All @@ -447,7 +443,6 @@ public extension DecodingFallback {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public struct DecodingResult {
public var language: String
public var languageProbs: [String: Float]
Expand Down Expand Up @@ -900,7 +895,6 @@ public class MelSpectrogramInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class MelSpectrogramOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -937,7 +931,6 @@ public class MelSpectrogramOutput: MLFeatureProvider {
// MARK: AudioEncoder

/// Model Prediction Input Type
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoderInput: MLFeatureProvider {
/// melspectrogram_features as 1 × {80,128} × 1 × 3000 4-dimensional array of floats
public var melspectrogram_features: MLMultiArray
Expand All @@ -963,7 +956,6 @@ public class AudioEncoderInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoderOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -1000,7 +992,6 @@ public class AudioEncoderOutput: MLFeatureProvider {
// MARK: TextDecoder

/// Model Prediction Input Type
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderInput: MLFeatureProvider {
/// input_ids as 1 element vector of 32-bit integers
public var input_ids: MLMultiArray
Expand Down Expand Up @@ -1068,7 +1059,6 @@ public class TextDecoderInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -1175,7 +1165,6 @@ public class TextDecoderCachePrefillInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderCachePrefillOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down
4 changes: 0 additions & 4 deletions Sources/WhisperKit/Core/Text/LogitsFilter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ public protocol LogitsFiltering {
func filterLogits(_ logits: MLMultiArray, withTokens tokens: [Int]) -> MLMultiArray
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class SuppressTokensFilter: LogitsFiltering {
let suppressTokens: [Int]
private let suppressTokenIndexes: [[NSNumber]]
Expand All @@ -26,7 +25,6 @@ open class SuppressTokensFilter: LogitsFiltering {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class SuppressBlankFilter: LogitsFiltering {
let specialTokens: SpecialTokens
let sampleBegin: Int
Expand Down Expand Up @@ -54,7 +52,6 @@ open class SuppressBlankFilter: LogitsFiltering {
}

/// Implementation based on https://github.com/openai/whisper/blob/master/whisper/decoding.py#L441
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class TimestampRulesFilter: LogitsFiltering {
let specialTokens: SpecialTokens
let sampleBegin: Int
Expand Down Expand Up @@ -246,7 +243,6 @@ open class TimestampRulesFilter: LogitsFiltering {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class LanguageLogitsFilter: LogitsFiltering {
let allLanguageTokens: Set<Int>
let logitsDim: Int
Expand Down
2 changes: 0 additions & 2 deletions Sources/WhisperKit/Core/Text/SegmentSeeker.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import CoreML
import Foundation
import Tokenizers

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol SegmentSeeking {
func findSeekPointAndSegments(
decodingResult: DecodingResult,
Expand Down Expand Up @@ -34,7 +33,6 @@ public protocol SegmentSeeking {
) throws -> [TranscriptionSegment]?
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class SegmentSeeker: SegmentSeeking {
public init() {}

Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Core/Text/TokenSampler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ public struct SamplingResult: Sendable {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class GreedyTokenSampler: TokenSampling {
public var temperature: FloatType
public var eotToken: Int
Expand Down
3 changes: 0 additions & 3 deletions Sources/WhisperKit/Core/TextDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ public protocol DecodingInputsType {
func reset(prefilledCacheSize: Int, maxTokenContext: Int)
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol TextDecoding {
var tokenizer: WhisperTokenizer? { get set }
var prefillData: WhisperMLModel? { get set }
Expand Down Expand Up @@ -131,7 +130,6 @@ public protocol TextDecoding {
)
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension TextDecoding {
@available(*, deprecated, message: "Subject to removal in a future version. Use `decodeText(from:using:sampler:options:callback:) async throws -> DecodingResult` instead.")
func decodeText(
Expand Down Expand Up @@ -484,7 +482,6 @@ public class TextDecoderContextPrefill: WhisperMLModel {
public var model: MLModel?
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class TextDecoder: TextDecoding, WhisperMLModel {
public var model: MLModel?
public var tokenizer: WhisperTokenizer?
Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Core/TranscribeTask.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import CoreML
import Foundation

/// Responsible for transcribing audio chunk to text using the provided models and configurations.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
final class TranscribeTask {
private var timings: TranscriptionTimings
private let progress: Progress
Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Core/WhisperKit.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import Hub
import TensorUtils
import Tokenizers

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
open class WhisperKit {
/// Models
public private(set) var modelVariant: ModelVariant = .tiny
Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Utilities/Concurrency.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import Foundation

/// An actor that provides thread-safe early stopping functionality using UUIDs as keys
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public actor EarlyStopActor {
private var shouldStop = [UUID: Bool]()

Expand Down
1 change: 0 additions & 1 deletion Sources/WhisperKit/Utilities/Extensions+Internal.swift
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ extension AudioProcessing {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
extension DecodingOptions {
func prepareSeekClips(contentFrames: Int) -> [(start: Int, end: Int)] {
var seekPoints: [Int] = clipTimestamps.map { Int(round($0 * Float(WhisperKit.sampleRate))) }
Expand Down
3 changes: 0 additions & 3 deletions Sources/WhisperKit/Utilities/Extensions+Public.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ public extension Array where Element == TranscriptionSegment {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension WhisperKit {
static var isRunningOnSimulator: Bool {
#if targetEnvironment(simulator)
Expand Down Expand Up @@ -56,7 +55,6 @@ public extension String {
// MARK: CoreML

public extension MLMultiArray {
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
convenience init(shape: [NSNumber], dataType: MLMultiArrayDataType, initialValue: Any) throws {
switch dataType {
case .float16:
Expand Down Expand Up @@ -300,7 +298,6 @@ public func resolveAbsolutePath(_ inputPath: String) -> String {


@available(*, deprecated, message: "Subject to removal in a future version. Use `ModelUtilities.formatModelFiles(_:)` instead.")
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension WhisperKit {
static func formatModelFiles(_ modelFiles: [String]) -> [String] {
return ModelUtilities.formatModelFiles(modelFiles)
Expand Down
3 changes: 0 additions & 3 deletions Sources/WhisperKit/Utilities/ModelUtilities.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ public struct ModelUtilities {

// MARK: Public

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public static func modelSupport(for deviceName: String, from config: ModelSupportConfig? = nil) -> ModelSupport {
let config = config ?? Constants.fallbackModelSupportConfig
let modelSupport = config.modelSupport(for: deviceName)
Expand Down Expand Up @@ -274,14 +273,12 @@ public func loadTokenizer(
}

@available(*, deprecated, message: "Subject to removal in a future version. Use ModelUtilities.modelSupport(for:from:) -> ModelSupport instead.")
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public func modelSupport(for deviceName: String, from config: ModelSupportConfig? = nil) -> ModelSupport {
return ModelUtilities.modelSupport(for: deviceName, from: config)
}

@available(*, deprecated, message: "Subject to removal in a future version. Use ModelUtilities.modelSupport(for:from:) -> ModelSupport instead.")
@_disfavoredOverload
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public func modelSupport(for deviceName: String, from config: ModelSupportConfig? = nil) -> (default: String, disabled: [String]) {
let modelSupport = ModelUtilities.modelSupport(for: deviceName, from: config)
return (modelSupport.default, modelSupport.disabled)
Expand Down
Loading