Skip to content

Commit 9434fdd

Browse files
added optinal pca and iteration
1 parent dc0ca5c commit 9434fdd

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

tasnif/calculations.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,13 @@ def calculate_pca(embeddings, pca_dim):
5757
return pca_embeddings
5858

5959

60-
def calculate_kmeans(pca_embeddings, num_classes):
60+
def calculate_kmeans(pca_embeddings, num_classes, iter=10):
6161
"""
6262
The function `calculate_kmeans` performs KMeans clustering on PCA embeddings data to assign
6363
labels and centroids.
64+
:param pca_embeddings: The `pca_embeddings` parameter is a NumPy array containing the data points.
65+
:param num_classes: The `num_classes` parameter is an integer that specifies the number of clusters.
66+
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10. Should be a positive integer.
6467
"""
6568

6669
if not isinstance(pca_embeddings, np.ndarray):
@@ -72,7 +75,7 @@ def calculate_kmeans(pca_embeddings, num_classes):
7275
)
7376

7477
try:
75-
centroid, labels = kmeans2(data=pca_embeddings, k=num_classes, minit="points")
78+
centroid, labels = kmeans2(data=pca_embeddings, k=num_classes, minit="points", iter=iter)
7679
counts = np.bincount(labels)
7780
logging.info("KMeans calculated.")
7881
return centroid, labels, counts

tasnif/tasnif.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,20 +52,26 @@ def read(self, folder_path):
5252
self.image_paths = read_images_from_directory(folder_path)
5353
self.images = read_with_pil(self.image_paths)
5454

55-
def calculate(self):
55+
def calculate(self, pca=True, iter=10):
5656
"""
5757
The function calculates embeddings, performs PCA, and applies K-means clustering to the
5858
embeddings. It will not perform these operations if no images have been read.
59+
60+
:param pca: The `pca` parameter is a boolean that specifies whether to perform PCA or not. Default is True
61+
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10.
5962
"""
6063

6164
if not self.images:
6265
raise ValueError("The images list can not be empty. Please call the read method before calculating.")
6366

6467
self.embeddings = get_embeddings(use_gpu=self.use_gpu, images=self.images)
65-
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
66-
self.centroid, self.labels, self.counts = calculate_kmeans(
67-
self.pca_embeddings, self.num_classes
68-
)
68+
if pca:
69+
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
70+
self.centroid, self.labels, self.counts = calculate_kmeans(self.pca_embeddings, self.num_classes, iter = iter)
71+
else:
72+
self.centroid, self.labels, self.counts = calculate_kmeans(
73+
self.embeddings, self.num_classes, iter = iter
74+
)
6975

7076
def export(self, output_folder="./"):
7177
"""

0 commit comments

Comments
 (0)