Skip to content

Commit 1adc725

Browse files
authored
Merge pull request #4 from Stealeristaken/deneme
added optinal pca and optinal iteration
2 parents 0d69d97 + 9434fdd commit 1adc725

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

tasnif/calculations.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,13 @@ def calculate_pca(embeddings, pca_dim):
4949
return pca_embeddings
5050

5151

52-
def calculate_kmeans(pca_embeddings, num_classes):
52+
def calculate_kmeans(pca_embeddings, num_classes, iter=10):
5353
"""
5454
The function `calculate_kmeans` performs KMeans clustering on PCA embeddings data to assign
5555
labels and centroids.
56+
:param pca_embeddings: The `pca_embeddings` parameter is a NumPy array containing the data points.
57+
:param num_classes: The `num_classes` parameter is an integer that specifies the number of clusters.
58+
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10. Should be a positive integer.
5659
"""
5760

5861
if not isinstance(pca_embeddings, np.ndarray):
@@ -64,7 +67,7 @@ def calculate_kmeans(pca_embeddings, num_classes):
6467
)
6568

6669
try:
67-
centroid, labels = kmeans2(data=pca_embeddings, k=num_classes, minit="points")
70+
centroid, labels = kmeans2(data=pca_embeddings, k=num_classes, minit="points", iter=iter)
6871
counts = np.bincount(labels)
6972
info("KMeans calculated.")
7073
return centroid, labels, counts

tasnif/tasnif.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,26 @@ def read(self, folder_path):
4242
self.image_paths = read_images_from_directory(folder_path)
4343
self.images = read_with_pil(self.image_paths)
4444

45-
def calculate(self):
45+
def calculate(self, pca=True, iter=10):
4646
"""
4747
The function calculates embeddings, performs PCA, and applies K-means clustering to the
4848
embeddings. It will not perform these operations if no images have been read.
49+
50+
:param pca: The `pca` parameter is a boolean that specifies whether to perform PCA or not. Default is True
51+
:param iter: The `iter` parameter is an integer that specifies the number of iterations for the KMeans algorithm. Default is 10.
4952
"""
5053

5154
if not self.images:
5255
raise ValueError("The images list can not be empty. Please call the read method before calculating.")
5356

5457
self.embeddings = get_embeddings(use_gpu=self.use_gpu, images=self.images)
55-
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
56-
self.centroid, self.labels, self.counts = calculate_kmeans(
57-
self.pca_embeddings, self.num_classes
58-
)
58+
if pca:
59+
self.pca_embeddings = calculate_pca(self.embeddings, self.pca_dim)
60+
self.centroid, self.labels, self.counts = calculate_kmeans(self.pca_embeddings, self.num_classes, iter = iter)
61+
else:
62+
self.centroid, self.labels, self.counts = calculate_kmeans(
63+
self.embeddings, self.num_classes, iter = iter
64+
)
5965

6066
def export(self, output_folder="./"):
6167
"""

0 commit comments

Comments
 (0)