diff --git a/pydatastructs/linear_data_structures/__init__.py b/pydatastructs/linear_data_structures/__init__.py index 602dc6df4..8ba1c68d5 100644 --- a/pydatastructs/linear_data_structures/__init__.py +++ b/pydatastructs/linear_data_structures/__init__.py @@ -26,6 +26,7 @@ brick_sort, brick_sort_parallel, heapsort, - matrix_multiply_parallel + matrix_multiply_parallel, + optimal_grouping ) __all__.extend(algorithms.__all__) diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index f5eb2358d..0bf9b6aa8 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -3,13 +3,15 @@ from pydatastructs.utils.misc_util import _check_type, _comp from concurrent.futures import ThreadPoolExecutor from math import log, floor +from typing import List __all__ = [ 'merge_sort_parallel', 'brick_sort', 'brick_sort_parallel', 'heapsort', - 'matrix_multiply_parallel' + 'matrix_multiply_parallel', + 'optimal_grouping' ] def _merge(array, sl, el, sr, er, end, comp): @@ -346,7 +348,7 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads): row_matrix_2, col_matrix_2 = len(matrix_2), len(matrix_2[0]) if col_matrix_1 != row_matrix_2: - raise ValueError("Matrix size mismatch: %s * %s"%( + raise ValueError("Matrix size mismatch: %s * %s" % ( (row_matrix_1, col_matrix_1), (row_matrix_2, col_matrix_2))) C = [[None for i in range(col_matrix_1)] for j in range(row_matrix_2)] @@ -360,3 +362,185 @@ def matrix_multiply_parallel(matrix_1, matrix_2, num_threads): i, j).result() return C + +def _compare_opt_group(maximize, value, compareWith=None): + """ + compares a value with another. if compareWith is None then value is compared with Infinity or -Infinity + parameters + [maximize] if True then the function returns true if value is greater than compareWith and vice versa + """ + if compareWith is None: + if maximize: + compareWith = float('-inf') + else: + compareWith = float('inf') + if maximize: + return value > compareWith + return value < compareWith + +def _initialize_arrays_opt_group(maximize, rows, columns): + """ + returns a 2-d array of rows*columns size filled with either Infinity or -Infinity + parameters: + [maximize] + if 'True' fills with -Infinity and vice versa + [rows] + expects a number + [columns] + expects a number + """ + value = float('inf') + if maximize: + value = float('-inf') + return [[value for a in range(0, columns+1)] for a in range(0, rows+1)] + +def _optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob, min_compare_len, lookup_index, get_lookup_fn, cost_fn): + """ + Helper function for optimal_grouping function + """ + + # gets the present value at the present index + present_value = cost_storage[lookup_index[0]][lookup_index[1]] + # return the present value if it is not infinity + if _compare_opt_group(maximize_prob, present_value): + return present_value + + # get the start and end indices where end index depends on the min_compare_len + start_index = lookup_index[0] + end_index = lookup_index[1]+1-(min_compare_len-1) + + if start_index is end_index or start_index > end_index: + cost = cost_fn(object_arr, lookup_index, start_index) + if _compare_opt_group(maximize_prob, cost, present_value): + cost_storage[lookup_index[0]][lookup_index[1]] = cost + solution_matrix[lookup_index[0]][lookup_index[1]] = start_index + present_value = cost + + for i in range(start_index, end_index): + + # get indices for left recursion tree + left_rec_indices = get_lookup_fn('before', lookup_index, i) + + cost = _optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob, + min_compare_len, left_rec_indices, get_lookup_fn, cost_fn) + + # get indices for right recursion tree + right_rec_indices = get_lookup_fn('after', lookup_index, i) + + cost = cost+_optimal_grouping_rec(object_arr, cost_storage, solution_matrix, maximize_prob, + min_compare_len, right_rec_indices, get_lookup_fn, cost_fn) + + # get cost for present partition + cost = cost+cost_fn(object_arr, lookup_index, i) + + # update the values if this is the best solution until now + if _compare_opt_group(maximize_prob, cost, present_value): + cost_storage[lookup_index[0]][lookup_index[1]] = cost + solution_matrix[lookup_index[0]][lookup_index[1]] = i + present_value = cost + + return present_value + +def optimal_grouping(process_objects, maximize_prob, min_compare_len, lookup_index, get_lookup_fn, cost_fn): + """ + Description + =========== + Optimal Grouping groups given set of objects using the given cost function + + Parameters + ========== + process_objects + accepts array of objects on which the algorithm is supposed to run + maximize_prob + pass True if the algorithm should find maximum value of the cost function otherwise pass False + min_compare_len + a positive number decides to which level of gap the algorithm can maintain while iterating from start to end, + for example-> if minimun length is 2 then it can only iterate if endIndex=startIndex+2 + lookup_index + format-->[start_index,endIndex] algorithm runs from start to end + get_lookup_fn + should return next range of indices + sample -> get_lookup_fn(position, rangeIndices, currentIndex) + position is either 'before' or 'after' + rangeIndices is the present range of index like [start_index,endIndex] + cost_fn + should return the cost + sample -> cost_fn(process_objects,rangeIndices,currentIndex) + + + Usage examples + ============== + + 1.OPTIMAL BINARY SEARCH TREE + + from binarytree import Node + n = 5 + p = [None, Node(0.15), Node(0.10), Node(0.05), Node(0.10), Node(0.20)] + q = [Node(0.05), Node(0.10), Node(0.05), Node(0.05), Node(0.05), Node(0.10)] + + + def lookup(position, endIndex, middle): + if position is 'before': + return [endIndex[0], middle-1] + else: + return [middle+1, endIndex[1]] + + + def cost(obj, endIndex, middle): + + if(endIndex[1] lookup_index[1]: + raise ValueError( + 'lookup index should at least have 2 integer items, first specifying the start and second specifying the last indices') + # end of edge cases + + length = lookup_index[1]-lookup_index[0]+1 + + # for storing the computed values (helper array) + cost_storage = _initialize_arrays_opt_group( + maximize_prob, length+1, length+1) + # for storing the solutions + solution_matrix = _initialize_arrays_opt_group( + maximize_prob, length+1, length+1) + + _optimal_grouping_rec(process_objects, cost_storage, solution_matrix, maximize_prob, + min_compare_len, lookup_index, get_lookup_fn, cost_fn) + return solution_matrix diff --git a/pydatastructs/linear_data_structures/tests/test_algorithms.py b/pydatastructs/linear_data_structures/tests/test_algorithms.py index 1fc24e8ea..bf0a6debb 100644 --- a/pydatastructs/linear_data_structures/tests/test_algorithms.py +++ b/pydatastructs/linear_data_structures/tests/test_algorithms.py @@ -1,7 +1,7 @@ from pydatastructs import ( merge_sort_parallel, DynamicOneDimensionalArray, OneDimensionalArray, brick_sort, brick_sort_parallel, - heapsort, matrix_multiply_parallel) + heapsort, matrix_multiply_parallel, optimal_grouping) from pydatastructs.utils.raises_util import raises import random @@ -76,3 +76,30 @@ def test_matrix_multiply_parallel(): J = [[2, 1, 2], [1, 2, 1], [2, 2, 2]] output = matrix_multiply_parallel(I, J, num_threads=1) assert expected_result == output + +def test_optimal_grouping(): + #test case1: + def cost(matrix, endIndex, middle): + + if endIndex[0] == endIndex[1]: + return 0 + return matrix[endIndex[0]-1]*matrix[middle]*matrix[endIndex[1]] + + def lookup(position, endIndex, middle): + if position is 'before': + return [endIndex[0], middle] + else: + return [middle+1, endIndex[1]] + expected_result = [[float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf')], + [float('inf'), 1, 1, 1, 3, 3, 3, float('inf')], + [float('inf'), float('inf'), 2, 2, 3, 3, 3, float('inf')], + [float('inf'), float('inf'), float('inf'), 3, 3, 3, 3, float('inf')], + [float('inf'), float('inf'), float('inf'), float('inf'), 4, 4, 5, float('inf')], + [float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), 5, 5, float('inf')], + [float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), 6, float('inf')], + [float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf'), float('inf')]] + assert expected_result == optimal_grouping([30, 35, 15, 5, 10, 20, 25], False, 2, [1, 6], lookup, cost) + + #test case2: + expected_result = [[0, float('inf'), float('inf')], [float('inf'), float('inf'), float('inf')], [float('inf'), float('inf'), float('inf')]] + assert expected_result == optimal_grouping([], False, 2, [0,0], lookup, cost)