Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// MLMultiArray+Utils.swift
// CoreML+Extensions.swift
// CoreMLBert
//
// Created by Julien Chaumond on 27/06/2019.
Expand All @@ -10,7 +10,7 @@
import CoreML
import Foundation

public extension MLMultiArray {
extension MLMultiArray {
/// All values will be stored in the last dimension of the MLMultiArray (default is dims=1)
static func from(_ arr: [Int], dims: Int = 1) -> MLMultiArray {
var shape = Array(repeating: 1, count: dims)
Expand Down Expand Up @@ -88,7 +88,7 @@ public extension MLMultiArray {
}
}

public extension MLMultiArray {
extension MLMultiArray {
/// Provides a way to index n-dimensionals arrays a la numpy.
enum Indexing: Equatable {
case select(Int)
Expand Down Expand Up @@ -197,4 +197,48 @@ extension MLMultiArray {
return s + "]"
}
}

extension MLShapedArray<Float> {
var floats: [Float] {
guard strides.first == 1, strides.count == 1 else {
// For some reason this path is slow.
// If strides is not 1, we can write a Metal kernel to copy the values properly.
return scalars
}

// Fast path: memcpy
let mlArray = MLMultiArray(self)
return mlArray.floats ?? scalars
}
}

extension MLShapedArraySlice<Float> {
var floats: [Float] {
guard strides.first == 1, strides.count == 1 else {
// For some reason this path is slow.
// If strides is not 1, we can write a Metal kernel to copy the values properly.
return scalars
}

// Fast path: memcpy
let mlArray = MLMultiArray(self)
return mlArray.floats ?? scalars
}
}

extension MLMultiArray {
var floats: [Float]? {
guard dataType == .float32 else { return nil }

var result: [Float] = Array(repeating: 0, count: count)
return withUnsafeBytes { ptr in
guard let source = ptr.baseAddress else { return nil }
result.withUnsafeMutableBytes { resultPtr in
let dest = resultPtr.baseAddress!
memcpy(dest, source, self.count * MemoryLayout<Float>.stride)
}
return result
}
}
}
#endif // canImport(CoreML)
54 changes: 0 additions & 54 deletions Sources/Generation/MLShapedArray+Utils.swift

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
//
// BOMDoubling.swift
// JSONSerialization+BOM.swift
// swift-transformers
//
// Created by Pedro Cuenca on 20250912
//

import Foundation

extension Data {
extension JSONSerialization {
class func bomPreservingJsonObject(with data: Data, options: JSONSerialization.ReadingOptions = []) throws -> Any {
try JSONSerialization.jsonObject(with: data.duplicatingBOMsAfterQuotes, options: options)
}
}

private extension Data {
/// Workaround for https://github.com/huggingface/swift-transformers/issues/116
/// Duplicate a BOM sequence that follows a quote. The first BOM is swallowed by JSONSerialization.jsonObject
/// because it thinks it marks the encoding.
Expand Down Expand Up @@ -40,9 +46,3 @@ extension Data {
}
}
}

extension JSONSerialization {
class func bomPreservingJsonObject(with data: Data, options: JSONSerialization.ReadingOptions = []) throws -> Any {
try JSONSerialization.jsonObject(with: data.duplicatingBOMsAfterQuotes, options: options)
}
}
4 changes: 2 additions & 2 deletions Sources/Hub/HubApi.swift
Original file line number Diff line number Diff line change
Expand Up @@ -891,13 +891,13 @@ public extension Hub {
}
}

public extension [String] {
private extension [String] {
func matching(glob: String) -> [String] {
filter { fnmatch(glob, $0, 0) == 0 }
}
}

public extension FileManager {
private extension FileManager {
func getFileUrls(at directoryUrl: URL) throws -> [URL] {
var fileUrls = [URL]()

Expand Down
2 changes: 1 addition & 1 deletion Sources/Tokenizers/BertTokenizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class BasicTokenizer {
}
}

extension Character {
private extension Character {
/// https://github.com/huggingface/transformers/blob/8c1b5d37827a6691fef4b2d926f2d04fb6f5a9e3/src/transformers/tokenization_utils.py#L367
var isExtendedPunctuation: Bool {
if isPunctuation { return true }
Expand Down
2 changes: 1 addition & 1 deletion Sources/Tokenizers/Decoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ class MetaspaceDecoder: Decoder {
}

/// We could use firstIndex(where:), lastIndex(where:) for possibly better efficiency (and do both ends at once)
public extension String {
private extension String {
func trimmingFromStart(character: Character = " ", upto: Int) -> String {
var result = self
var trimmed = 0
Expand Down
161 changes: 0 additions & 161 deletions Sources/Tokenizers/PreTokenizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -238,164 +238,3 @@ class SplitPreTokenizer: PreTokenizer {
return pattern.split(text, invert: invert)
}
}

enum StringSplitPattern {
case regexp(regexp: String)
case string(pattern: String)
}

extension StringSplitPattern {
func split(_ text: String, invert: Bool = true) -> [String] {
switch self {
case let .regexp(regexp):
text.split(by: regexp, includeSeparators: true)
case let .string(substring):
text.split(by: substring, options: [], includeSeparators: !invert)
}
}
}

extension StringSplitPattern {
static func from(config: Config) -> StringSplitPattern? {
if let pattern = config.pattern.String.string() {
return StringSplitPattern.string(pattern: pattern)
}
if let pattern = config.pattern.Regex.string() {
return StringSplitPattern.regexp(regexp: pattern)
}
return nil
}
}

public extension String {
func ranges(of string: String, options: CompareOptions = .regularExpression) -> [Range<Index>] {
var result: [Range<Index>] = []
var start = startIndex
while let range = range(of: string, options: options, range: start..<endIndex) {
result.append(range)
start = range.lowerBound < range.upperBound ? range.upperBound : index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
}
return result
}

func split(by string: String, options: CompareOptions = .regularExpression, includeSeparators: Bool = false, omittingEmptySubsequences: Bool = true) -> [String] {
var result: [String] = []
var start = startIndex
while let range = range(of: string, options: options, range: start..<endIndex) {
// Prevent empty strings
if omittingEmptySubsequences, start < range.lowerBound {
result.append(String(self[start..<range.lowerBound]))
}
if includeSeparators {
result.append(String(self[range]))
}
start = range.upperBound
}

if omittingEmptySubsequences, start < endIndex {
result.append(String(self[start...]))
}
return result
}

/// This version supports capture groups, wheres the one above doesn't
func split(by captureRegex: NSRegularExpression) -> [String] {
// Find the matching capture groups
let selfRange = NSRange(startIndex..<endIndex, in: self)
let matches = captureRegex.matches(in: self, options: [], range: selfRange)

if matches.isEmpty { return [self] }

var result: [String] = []
var start = startIndex

for match in matches {
// IMPORTANT: convert from NSRange to Range<String.Index>
// https://stackoverflow.com/questions/75543272/convert-a-given-utf8-nsrange-in-a-string-to-a-utf16-nsrange
guard let matchRange = Range(match.range, in: self) else { continue }

// Add text before the match
if start < matchRange.lowerBound {
result.append(String(self[start..<matchRange.lowerBound]))
}

// Move start to after the match
start = matchRange.upperBound

// Append separator, supporting capture groups
for r in (0..<match.numberOfRanges).reversed() {
let nsRange = match.range(at: r)
if let sepRange = Range(nsRange, in: self) {
result.append(String(self[sepRange]))
break
}
}
}

// Append remaining suffix
if start < endIndex {
result.append(String(self[start...]))
}

return result
}
}

public enum SplitDelimiterBehavior {
case removed
case isolated
case mergedWithPrevious
case mergedWithNext
}

public extension String {
func split(by string: String, options: CompareOptions = .regularExpression, behavior: SplitDelimiterBehavior) -> [String] {
func mergedWithNext(ranges: [Range<String.Index>]) -> [Range<String.Index>] {
var merged: [Range<String.Index>] = []
var currentStart = startIndex
for range in ranges {
if range.lowerBound == startIndex { continue }
let mergedRange = currentStart..<range.lowerBound
currentStart = range.lowerBound
merged.append(mergedRange)
}
if currentStart < endIndex {
merged.append(currentStart..<endIndex)
}
return merged
}

func mergedWithPrevious(ranges: [Range<String.Index>]) -> [Range<String.Index>] {
var merged: [Range<String.Index>] = []
var currentStart = startIndex
for range in ranges {
let mergedRange = currentStart..<range.upperBound
currentStart = range.upperBound
merged.append(mergedRange)
}
if currentStart < endIndex {
merged.append(currentStart..<endIndex)
}
return merged
}

switch behavior {
case .removed:
return split(by: string, options: options, includeSeparators: false)
case .isolated:
return split(by: string, options: options, includeSeparators: true)
case .mergedWithNext:
// Obtain ranges and merge them
// "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 2), (3, 8), (9, 9), (10, end)
let ranges = ranges(of: string, options: options)
let merged = mergedWithNext(ranges: ranges)
return merged.map { String(self[$0]) }
case .mergedWithPrevious:
// Obtain ranges and merge them
// "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 3), (4, 9), (10, 10), (11, end)
let ranges = ranges(of: string, options: options)
let merged = mergedWithPrevious(ranges: ranges)
return merged.map { String(self[$0]) }
}
}
}
Loading