From 1bbd1c6a560ed990256194989b3f7f2c4a5e7f7a Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Sat, 14 Sep 2019 02:16:30 -0400 Subject: [PATCH 01/76] Subpart 1 completed --- Project2-Stream-Compaction/src/main.cpp | 3 + .../stream_compaction/CMakeLists.txt | 2 +- .../stream_compaction/common.cu | 17 +- .../stream_compaction/cpu.cu | 63 ++++++- .../stream_compaction/efficient.cu | 175 +++++++++++++++++- .../stream_compaction/naive.cu | 87 ++++++++- 6 files changed, 333 insertions(+), 14 deletions(-) diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index d016553..85225fb 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -117,6 +117,7 @@ int main(int argc, char* argv[]) { expectedCount = count; printArray(count, b, true); printCmpLenResult(count, expectedCount, b, b); + printf("Finished.\n\n"); zeroArray(SIZE, c); printDesc("cpu compact without scan, non-power-of-two"); @@ -125,6 +126,7 @@ int main(int argc, char* argv[]) { expectedNPOT = count; printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); + printf("Finished.\n\n"); zeroArray(SIZE, c); printDesc("cpu compact with scan"); @@ -132,6 +134,7 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); printArray(count, c, true); printCmpLenResult(count, expectedCount, b, c); + printf("Finished.\n\n"); zeroArray(SIZE, c); printDesc("work-efficient compact, power-of-two"); diff --git a/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt b/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt index cdbef77..4bb0dc2 100644 --- a/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt +++ b/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt @@ -13,5 +13,5 @@ set(SOURCE_FILES cuda_add_library(stream_compaction ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_61 ) diff --git a/Project2-Stream-Compaction/stream_compaction/common.cu b/Project2-Stream-Compaction/stream_compaction/common.cu index 2ed6d63..ded83d0 100644 --- a/Project2-Stream-Compaction/stream_compaction/common.cu +++ b/Project2-Stream-Compaction/stream_compaction/common.cu @@ -24,7 +24,16 @@ namespace StreamCompaction { */ __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { // TODO - } + int tid = (blockIdx.x * blockDim.x) + threadIdx.x; + if (tid >= n) return; + + if(idata[tid]!=0){ + bools[tid] = 1; + } + else { + bools[tid] = 0; + } + } /** * Performs scatter on an array. That is, for each element in idata, @@ -33,6 +42,12 @@ namespace StreamCompaction { __global__ void kernScatter(int n, int *odata, const int *idata, const int *bools, const int *indices) { // TODO + int tid = (blockIdx.x * blockDim.x) + threadIdx.x; + if (tid >= n) return; + + if (bools[tid] == 1) { + odata[indices[tid]] = idata[tid]; + } } } diff --git a/Project2-Stream-Compaction/stream_compaction/cpu.cu b/Project2-Stream-Compaction/stream_compaction/cpu.cu index a2d3e6c..a5c30b1 100644 --- a/Project2-Stream-Compaction/stream_compaction/cpu.cu +++ b/Project2-Stream-Compaction/stream_compaction/cpu.cu @@ -18,9 +18,22 @@ namespace StreamCompaction { * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function first. */ void scan(int n, int *odata, const int *idata) { - timer().startCpuTimer(); + bool tmp=true; + try { + timer().startCpuTimer(); + } + catch (const std::runtime_error& e) { + tmp = false; + } + // TODO - timer().endCpuTimer(); + if (n > 0) { + odata[0] = 0; + for (int i = 0; i < n-1; i++) { + odata[i+1] = idata[i] + odata[i]; + } + } + if(tmp ==true) timer().endCpuTimer(); } /** @@ -29,9 +42,19 @@ namespace StreamCompaction { * @returns the number of elements remaining after compaction. */ int compactWithoutScan(int n, int *odata, const int *idata) { - timer().startCpuTimer(); // TODO - timer().endCpuTimer(); + if (n > 0) { + timer().startCpuTimer(); + int counter = 0; + for (int i = 0; i < n; i++) { + if (idata[i] != 0) { + odata[counter] = idata[i]; + counter+=1; + } + } + timer().endCpuTimer(); + return counter; + } return -1; } @@ -41,9 +64,37 @@ namespace StreamCompaction { * @returns the number of elements remaining after compaction. */ int compactWithScan(int n, int *odata, const int *idata) { - timer().startCpuTimer(); // TODO - timer().endCpuTimer(); + if (n > 0) { + timer().startCpuTimer(); + + int * indicator = new int[n]; + int * scanIndex = new int[n]; + int tmp = 0; + + // Compute indicator array + for (int i = 0; i < n; i++) { + if (idata[i] != 0) { + indicator[i] = 1; + } + else { + indicator[i] = 0; + } + } + + // Compute scan + scan(n, scanIndex, indicator); + + //Scatter + for (int i = 0; i < n; i++) { + if (indicator[i] == 1) { + odata[scanIndex[i]] = idata[i]; + tmp = scanIndex[i]; + } + } + timer().endCpuTimer(); + return tmp+1; + } return -1; } } diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index 2db346e..ea340d4 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -3,6 +3,8 @@ #include "common.h" #include "efficient.h" +#define blockSize 128 + namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; @@ -12,15 +14,128 @@ namespace StreamCompaction { return timer; } + int *dev_arrayA; + int *dev_arrayB; + + int *dev_bools; + int *dev_boolScans; + + int *dev_idata; + int *dev_odata; + + int * dev_indices; + + void printArray(int n, int *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3d ", a[i]); + } + printf("]\n"); + } + + + __global__ void kernEffScanUpSweep(int N, int pow2d, int pow2d1, int* arrA) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + if (k >= N) return; + + if ((k % pow2d1) == 0 && (k + pow2d1 - 1)= N) return; + + int tmp = 0; + + if ((k % pow2d1) == 0 && (k + pow2d1 - 1) < N && (k + pow2d - 1) < N) { + tmp = arrA[k + pow2d -1]; + arrA[k + pow2d - 1] = arrA[k + pow2d1 - 1]; + arrA[k + pow2d1 - 1] += tmp; + } + } + + __global__ void kernInitZero(int N, int* array) { + + int tid = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (tid < N) { + array[tid] = 0; + } + } + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); + bool tmp = true; + try { + timer().startCpuTimer(); + } + catch (const std::runtime_error& e) { + tmp = false; + } + // TODO - timer().endGpuTimer(); + int n_new = n; + + //check for non-2powerN + if (1 << ilog2ceil(n) != n) + n_new = (1 << ilog2ceil(n)); + + int fullBlocksPerGrid((n_new + blockSize - 1) / blockSize); + + cudaMalloc((void**)&dev_arrayA, n_new * sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_arrayA failed!"); + + //Initialize to Zero + kernInitZero <<>> (n_new, dev_arrayA); + checkCUDAErrorFn("kernInitZero failed!"); + + // Fill dev_arrayA with idata + cudaMemcpy(dev_arrayA, idata, n * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayA failed!"); + + // Upstream + int pow2d1 = 0; + int pow2d = 0; + for (int d = 0; d <= ilog2ceil(n_new)-1; d++) { + pow2d = 1 << (d); + pow2d1 = 1 << (d+1); + kernEffScanUpSweep << > > (n_new, pow2d, pow2d1, dev_arrayA); + checkCUDAErrorFn("kernEffScanUpSweep failed!"); + } + + // Downstream + int *zero = new int[1]; + zero[0] = 0; + cudaMemcpy(dev_arrayA + n_new-1, zero, 1*sizeof(int), cudaMemcpyHostToDevice); + + for (int d = ilog2ceil(n_new)-1; d >= 0; d--) { + pow2d = 1 << (d); + pow2d1 = 1 << (d + 1); + kernEffScanDownSweep << > > (n_new, pow2d, pow2d1, dev_arrayA); + checkCUDAErrorFn("kernGenerateRandomPosArray failed!"); + } + + // Copy back to cpu + cudaMemcpy(odata, dev_arrayA, n*sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + + //printf("BBT Scan Final Computed : \n"); + //printArray(n, odata, true); + + if (tmp == true) timer().endCpuTimer(); + cudaFree(dev_arrayA); + return; } + /** * Performs stream compaction on idata, storing the result into odata. * All zeroes are discarded. @@ -33,8 +148,62 @@ namespace StreamCompaction { int compact(int n, int *odata, const int *idata) { timer().startGpuTimer(); // TODO + + //Compute bools + int fullBlocksPerGrid((n + blockSize - 1) / blockSize); + + cudaMalloc((void**)&dev_bools, n * sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_bools failed!"); + + cudaMalloc((void**)&dev_idata, n*sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_arrayA failed!"); + + cudaMemcpy(dev_idata, idata, n*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayA failed!"); + + Common::kernMapToBoolean<<>>(n, dev_bools, dev_idata); + checkCUDAErrorFn("kernMapToBoolean failed!"); + + //compute scans + int * indices = new int[n]; + int * bools = new int[n]; + + cudaMemcpy(bools, dev_bools, n*sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyToSymbol from bools to dev_bools failed!"); + + scan(n, indices, bools); + + cudaMalloc((void**)&dev_indices, n*sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_indices failed!"); + + cudaMemcpy(dev_indices, indices, n*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from indices to dev_indices failed!"); + + cudaMalloc((void**)&dev_odata, n*sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_indices failed!"); + + cudaMemcpy(dev_odata, odata, n*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from indices to dev_indices failed!"); + + //scatter + Common::kernScatter<<>>(n, dev_odata, dev_idata, dev_bools, dev_indices); + checkCUDAErrorFn("kernScatter failed!"); + + // Copy back to cpu + cudaMemcpy(odata, dev_odata, n*sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_odata to odata failed!"); + + //printf("GPU Compaction : \n"); + //printArray(indices[n - 1], odata, true); + timer().endGpuTimer(); - return -1; + + cudaFree(dev_bools); + cudaFree(dev_idata); + cudaFree(dev_indices); + cudaFree(dev_odata); + + return indices[n-1]; } } } diff --git a/Project2-Stream-Compaction/stream_compaction/naive.cu b/Project2-Stream-Compaction/stream_compaction/naive.cu index 4308876..7697db7 100644 --- a/Project2-Stream-Compaction/stream_compaction/naive.cu +++ b/Project2-Stream-Compaction/stream_compaction/naive.cu @@ -3,6 +3,8 @@ #include "common.h" #include "naive.h" +#define blockSize 128 + namespace StreamCompaction { namespace Naive { using StreamCompaction::Common::PerformanceTimer; @@ -11,15 +13,94 @@ namespace StreamCompaction { static PerformanceTimer timer; return timer; } + + int *dev_arrayA; + int *dev_arrayB; + + void printArray(int n, int *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3d ", a[i]); + } + printf("]\n"); + } + // TODO: __global__ + __global__ void kernPrefixSumScanArray(int N, int pow2d1, int* arrA, int*arrB) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + if (k >= N) return; + + if (k >= pow2d1) { + arrB[k] = arrA[k - (pow2d1)] + arrA[k]; + } + } - /** + __global__ void kernExclusiveShiftArray(int N, int* arrA, int*arrB) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (k >= N) return; + + if (k == 0) { + arrA[0] = 0; + } + else { + arrA[k] = arrB[k-1]; + } + } + + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { timer().startGpuTimer(); - // TODO + + int fullBlocksPerGrid((n + blockSize - 1) / blockSize); + + cudaMalloc((void**)&dev_arrayA, n*sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_arrayA failed!"); + + cudaMalloc((void**)&dev_arrayB, n*sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_arrayB failed!"); + + // Fill dev_arrayA with idata + cudaMemcpy(dev_arrayA, idata, n*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayA failed!"); + + // Fill dev_arrayB with idata + cudaMemcpy(dev_arrayB, idata, n*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayB failed!"); + + // Call Scan Kernel + int pow2d1 = 0; + + for (int d = 1; d <= ilog2ceil(n); d++) { + pow2d1 = 1 << (d - 1); + kernPrefixSumScanArray<<>>(n, pow2d1, dev_arrayA, dev_arrayB); + checkCUDAErrorFn("kernGenerateRandomPosArray failed!"); + + //Copy + cudaMemcpy(dev_arrayA, dev_arrayB, n*sizeof(int), cudaMemcpyDeviceToDevice); + } + + kernExclusiveShiftArray <<>> (n, dev_arrayA, dev_arrayB); + checkCUDAErrorFn("kernExclusiveShiftArray failed!"); + + // Fill dev_arrayA with idata + cudaMemcpy(odata, dev_arrayA, n*sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + + //printf("Final Computed after shifting: \n"); + //printArray(n, odata, true); + //printf("Computed: \n"); + timer().endGpuTimer(); - } + + cudaFree(dev_arrayA); + cudaFree(dev_arrayB); + } } } From 9f7b26a6b36866da032db0772960b9cf6e0932cc Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Sat, 14 Sep 2019 02:32:29 -0400 Subject: [PATCH 02/76] Subpart 1 completed --- Project2-Stream-Compaction/src/main.cpp | 3 --- Project2-Stream-Compaction/stream_compaction/efficient.cu | 8 ++++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index 85225fb..d016553 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -117,7 +117,6 @@ int main(int argc, char* argv[]) { expectedCount = count; printArray(count, b, true); printCmpLenResult(count, expectedCount, b, b); - printf("Finished.\n\n"); zeroArray(SIZE, c); printDesc("cpu compact without scan, non-power-of-two"); @@ -126,7 +125,6 @@ int main(int argc, char* argv[]) { expectedNPOT = count; printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - printf("Finished.\n\n"); zeroArray(SIZE, c); printDesc("cpu compact with scan"); @@ -134,7 +132,6 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); printArray(count, c, true); printCmpLenResult(count, expectedCount, b, c); - printf("Finished.\n\n"); zeroArray(SIZE, c); printDesc("work-efficient compact, power-of-two"); diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index ea340d4..cbd2930 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -202,8 +202,12 @@ namespace StreamCompaction { cudaFree(dev_idata); cudaFree(dev_indices); cudaFree(dev_odata); - - return indices[n-1]; + if (idata[n - 1] != 0) { + return indices[n - 1] + 1; + } + else { + return indices[n - 1]; + } } } } From cdea66309c66b317960d1b1f54746f1a23c9aa88 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Sat, 14 Sep 2019 19:51:00 -0400 Subject: [PATCH 03/76] Finished stream_compaction --- Project2-Stream-Compaction/src/main.cpp | 2 +- .../stream_compaction/efficient.cu | 44 +++++++++++-------- .../stream_compaction/naive.cu | 9 ++-- .../stream_compaction/thrust.cu | 20 ++++++++- 4 files changed, 49 insertions(+), 26 deletions(-) diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index d016553..683a3b5 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1 << 20; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index cbd2930..e8db5e6 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -73,13 +73,6 @@ namespace StreamCompaction { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - bool tmp = true; - try { - timer().startCpuTimer(); - } - catch (const std::runtime_error& e) { - tmp = false; - } // TODO int n_new = n; @@ -101,6 +94,15 @@ namespace StreamCompaction { cudaMemcpy(dev_arrayA, idata, n * sizeof(int), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayA failed!"); + bool tmp = true; + try { + timer().startGpuTimer(); + //printf("IN WEScan timer started!\n"); + } + catch (const std::runtime_error& e) { + tmp = false; + } + // Upstream int pow2d1 = 0; int pow2d = 0; @@ -123,6 +125,11 @@ namespace StreamCompaction { checkCUDAErrorFn("kernGenerateRandomPosArray failed!"); } + if (tmp == true) { + timer().endGpuTimer(); + //printf("IN WEScan timer ended!\n"); + } + // Copy back to cpu cudaMemcpy(odata, dev_arrayA, n*sizeof(int), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); @@ -130,7 +137,6 @@ namespace StreamCompaction { //printf("BBT Scan Final Computed : \n"); //printArray(n, odata, true); - if (tmp == true) timer().endCpuTimer(); cudaFree(dev_arrayA); return; } @@ -146,10 +152,10 @@ namespace StreamCompaction { * @returns The number of elements remaining after compaction. */ int compact(int n, int *odata, const int *idata) { - timer().startGpuTimer(); + // TODO - - //Compute bools + int * indices = new int[n]; + int * bools = new int[n]; int fullBlocksPerGrid((n + blockSize - 1) / blockSize); cudaMalloc((void**)&dev_bools, n * sizeof(int)); @@ -160,17 +166,17 @@ namespace StreamCompaction { cudaMemcpy(dev_idata, idata, n*sizeof(int), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayA failed!"); - + + timer().startGpuTimer(); + + //Compute bools Common::kernMapToBoolean<<>>(n, dev_bools, dev_idata); checkCUDAErrorFn("kernMapToBoolean failed!"); - //compute scans - int * indices = new int[n]; - int * bools = new int[n]; - cudaMemcpy(bools, dev_bools, n*sizeof(int), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyToSymbol from bools to dev_bools failed!"); - + + //compute scans scan(n, indices, bools); cudaMalloc((void**)&dev_indices, n*sizeof(int)); @@ -188,6 +194,8 @@ namespace StreamCompaction { //scatter Common::kernScatter<<>>(n, dev_odata, dev_idata, dev_bools, dev_indices); checkCUDAErrorFn("kernScatter failed!"); + + timer().endGpuTimer(); // Copy back to cpu cudaMemcpy(odata, dev_odata, n*sizeof(int), cudaMemcpyDeviceToHost); @@ -196,8 +204,6 @@ namespace StreamCompaction { //printf("GPU Compaction : \n"); //printArray(indices[n - 1], odata, true); - timer().endGpuTimer(); - cudaFree(dev_bools); cudaFree(dev_idata); cudaFree(dev_indices); diff --git a/Project2-Stream-Compaction/stream_compaction/naive.cu b/Project2-Stream-Compaction/stream_compaction/naive.cu index 7697db7..7b989f2 100644 --- a/Project2-Stream-Compaction/stream_compaction/naive.cu +++ b/Project2-Stream-Compaction/stream_compaction/naive.cu @@ -56,8 +56,7 @@ namespace StreamCompaction { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - + int fullBlocksPerGrid((n + blockSize - 1) / blockSize); cudaMalloc((void**)&dev_arrayA, n*sizeof(int)); @@ -74,6 +73,8 @@ namespace StreamCompaction { cudaMemcpy(dev_arrayB, idata, n*sizeof(int), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_arrayB failed!"); + timer().startGpuTimer(); + // Call Scan Kernel int pow2d1 = 0; @@ -89,6 +90,8 @@ namespace StreamCompaction { kernExclusiveShiftArray <<>> (n, dev_arrayA, dev_arrayB); checkCUDAErrorFn("kernExclusiveShiftArray failed!"); + timer().endGpuTimer(); + // Fill dev_arrayA with idata cudaMemcpy(odata, dev_arrayA, n*sizeof(int), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); @@ -97,8 +100,6 @@ namespace StreamCompaction { //printArray(n, odata, true); //printf("Computed: \n"); - timer().endGpuTimer(); - cudaFree(dev_arrayA); cudaFree(dev_arrayB); } diff --git a/Project2-Stream-Compaction/stream_compaction/thrust.cu b/Project2-Stream-Compaction/stream_compaction/thrust.cu index 1def45e..e5310b3 100644 --- a/Project2-Stream-Compaction/stream_compaction/thrust.cu +++ b/Project2-Stream-Compaction/stream_compaction/thrust.cu @@ -17,12 +17,28 @@ namespace StreamCompaction { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ + void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); + // TODO use `thrust::exclusive_scan` // example: for device_vectors dv_in and dv_out: // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); - timer().endGpuTimer(); + + thrust::host_vectorhost_idata(idata, idata+n); + thrust::host_vectorhost_odata(odata, odata+n); + checkCUDAErrorFn("thrust::host_vector host_odata or host_idata failed!"); + printf("Created Thrust pointers \n"); + + thrust::device_vector device_idata = host_idata; + thrust::device_vector device_odata = host_odata; + checkCUDAErrorFn("thrust::device_vector device_idata or device_odata failed!"); + + timer().startGpuTimer(); + thrust::exclusive_scan(device_idata.begin(), device_idata.end(), device_odata.begin()); + timer().endGpuTimer(); + + // Copy back to cpu + thrust::copy(device_odata.begin(), device_odata.end(), odata); } } } From 9fe20f4b5d7848034f5776b5e60a8af1c4777155 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Sun, 15 Sep 2019 01:07:16 -0400 Subject: [PATCH 04/76] init --- .../character_recognition/CMakeLists.txt | 2 +- .../character_recognition/common.cu | 10 +- .../character_recognition/mlp.cu | 76 +++++++++- .../character_recognition/mlp.h | 9 +- Project2-Character-Recognition/src/main.cpp | 139 ++---------------- 5 files changed, 105 insertions(+), 131 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/CMakeLists.txt b/Project2-Character-Recognition/character_recognition/CMakeLists.txt index 7446175..9e834c1 100644 --- a/Project2-Character-Recognition/character_recognition/CMakeLists.txt +++ b/Project2-Character-Recognition/character_recognition/CMakeLists.txt @@ -7,5 +7,5 @@ set(SOURCE_FILES cuda_add_library(character_recognition ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_61 ) diff --git a/Project2-Character-Recognition/character_recognition/common.cu b/Project2-Character-Recognition/character_recognition/common.cu index 2a754d4..b15a87a 100644 --- a/Project2-Character-Recognition/character_recognition/common.cu +++ b/Project2-Character-Recognition/character_recognition/common.cu @@ -5,11 +5,13 @@ void checkCUDAErrorFn(const char *msg, const char *file, int line) { if (cudaSuccess == err) { return; } - - fprintf(stderr, "CUDA error"); - if (file) { + fprintf(stderr, "CUDA error"); + + if (file) { fprintf(stderr, " (%s:%d)", file, line); } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); exit(EXIT_FAILURE); } + diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 5a3ed7f..7b13acd 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -3,6 +3,9 @@ #include "common.h" #include "mlp.h" +#define blockSize 128 + + namespace CharacterRecognition { using Common::PerformanceTimer; PerformanceTimer& timer() @@ -10,7 +13,7 @@ namespace CharacterRecognition { static PerformanceTimer timer; return timer; } - + // TODO: __global__ /** @@ -24,4 +27,75 @@ namespace CharacterRecognition { */ // TODO: implement required elements for MLP sections 1 and 2 here + + double * iLayer; + double * hLayer; + double * oLayer; + + double * w_kj; + double * w_ji; + + void printArray(int n, int *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3d ", a[i]); + } + printf("]\n"); + } + + // kernel to to matmul + // A mxn + // B nxk + // C mxk + __global__ matrixMultiplyKernel(const float *dev_A, const float *dev_B, const float *dev_C, int m, int n, int k) { + + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + int sum = 0; + if (col < k && row < m) + { + for (int i = 0; i < n; i++) + sum += dev_A[row * n + i] * dev_B[i * k + col]; + + dev_C[row * k + col] = sum; + } + } + } + + + void trainMLP(int n_input, int n_classes, int n_hidden, double *odata, const double *idata) { + timer().startGpuTimer(); + + // todo + //FORWARD PASS + // initlaise Wight layers randomly + // initialise input layer from i data + // Compute h1 = W1*x + // Compute H1 = Sig(h1) + // Compute h2 = W2*H1 + // Compute H2 = Sig(h2) + // Compute y=Softmax(H2) + + // Sample MatrixMultiplication + int n; + int m; + int k; + dim3 dimGrid((k + blockSize - 1) / blockSize, (m + blockSize - 1) / blockSize); + dim3 dimBlock(blockSize, blockSize); + + + timer().endGpuTimer(); + } + + // MLP section 2 Character Reader + //void initCharMLP(int N, int P, int iDim, int hDim, int oDim); + //void readData(int N, int P, int iDim, int hDim, int oDim); + //void trainCharMLP(int N, int P, int iDim, int hDim, int oDim); + //void testCharMLP(int N, int P, int iDim, int hDim, int oDim); + } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 2096228..ba53ffd 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -4,6 +4,13 @@ namespace CharacterRecognition { Common::PerformanceTimer& timer(); + // TODO: implement required elements for MLP sections 1 and 2 here + + // MLP section 1 and 2 Character Reader + void initMLP(int N, int P, int iDim, int hDim, int oDim); + void readData(int N, int P, int iDim, int hDim, int oDim); + void trainMLP(int N, int P, int iDim, int hDim, int oDim); + void testMLP(int N, int P, int iDim, int hDim, int oDim); - // TODO: implement required elements for MLP sections 1 and 2 here } + diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 11dd534..9f11dcc 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -11,8 +11,17 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array -const int NPOT = SIZE - 3; // Non-Power-Of-Two + +// CONFIG ITEMS + +const int N = 52; // Number of examples +const int P = 10201; // Feature length + +int inputLayerSize = 2; //NN input layer size +int hiddenLayerSize = 2; //NN hidden layer size +int outputLayerSize = 1; //NN output layer size +int nClasses = 2; //NN number of classes + int *a = new int[SIZE]; int *b = new int[SIZE]; int *c = new int[SIZE]; @@ -22,131 +31,13 @@ int main(int argc, char* argv[]) { printf("\n"); printf("****************\n"); - printf("** SCAN TESTS **\n"); + printf("***MLP TESTS***\n"); printf("****************\n"); - genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - // initialize b using StreamCompaction::CPU::scan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. - // At first all cases passed because b && c are all zeroes. - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(SIZE, b, true); - - zeroArray(SIZE, c); - printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(NPOT, b, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("naive scan, power-of-two"); - StreamCompaction::Naive::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan - onesArray(SIZE, c); - printDesc("1s array for finding bugs"); - StreamCompaction::Naive::scan(SIZE, c, a); - printArray(SIZE, c, true); */ - - zeroArray(SIZE, c); - printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, power-of-two"); - StreamCompaction::Thrust::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, non-power-of-two"); - StreamCompaction::Thrust::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - printf("\n"); - printf("*****************************\n"); - printf("** STREAM COMPACTION TESTS **\n"); - printf("*****************************\n"); - - // Compaction tests - - genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - int count, expectedCount, expectedNPOT; - - // initialize b using StreamCompaction::CPU::compactWithoutScan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. - zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); - - zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - zeroArray(SIZE, c); - printDesc("cpu compact with scan"); - count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - zeroArray(SIZE, c); - printDesc("work-efficient compact, power-of-two"); - count = StreamCompaction::Efficient::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - zeroArray(SIZE, c); - printDesc("work-efficient compact, non-power-of-two"); - count = StreamCompaction::Efficient::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); + CharacterRecognition::trainMLP(SIZE, b, a); + CharacterRecognition::trainMLP(SIZE, b, a); - system("pause"); // stop Win32 console from closing on exit - delete[] a; - delete[] b; - delete[] c; + return 0; } From a539a8654c2afd0a240027f1134d7798973264a3 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Mon, 16 Sep 2019 04:09:08 -0400 Subject: [PATCH 05/76] MLP Basic Implementation --- .../character_recognition/mlp.cu | 481 ++++++++++++++++-- .../character_recognition/mlp.h | 8 +- Project2-Character-Recognition/src/main.cpp | 43 +- 3 files changed, 465 insertions(+), 67 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 7b13acd..fa2eb4d 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -2,8 +2,10 @@ #include #include "common.h" #include "mlp.h" +#include #define blockSize 128 +#define blockWidth 16 namespace CharacterRecognition { @@ -13,28 +15,33 @@ namespace CharacterRecognition { static PerformanceTimer timer; return timer; } - - // TODO: __global__ - - /** - * Example of use case (follow how you did it in stream compaction) - */ - /*void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO - timer().endGpuTimer(); - } - */ - - // TODO: implement required elements for MLP sections 1 and 2 here + + // Initlialiations - double * iLayer; - double * hLayer; - double * oLayer; + //layers + double *dev_iLayer; + double *dev_hLayer; + double *dev_oLayer; + double *dev_smaxDen; + double *dev_losses; + int *dev_gtruth; + + //weights + double *dev_w_kj; + double *dev_w_ji; + + //Derivatives + double *dev_dL_dw_ji; + double *dev_dL_dw_kj; + double *dev_dL_dscores; + double *dev_dL_dscores_2; + + double *dev_hLayer_T; + double *dev_iLayer_T; - double * w_kj; - double * w_ji; + + void printArray(int n, int *a, bool abridged = false) { printf(" [ "); for (int i = 0; i < n; i++) { @@ -46,56 +53,428 @@ namespace CharacterRecognition { } printf("]\n"); } + void printFloatArray(int n, double *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3f ", a[i]); + } + printf("]\n"); + } + + + + // Kernel for Gradient update on Weights + __global__ void kernUpdateWeights(int N, double *dev_dw, double *dev_w, double LR) { + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_w[tid] += -LR * dev_dw[tid]; + } + } + + // Kernel for derivative of sigmoid + __global__ void kernGradSigmoid(int N, int C, double *dev_hLayer) { - // kernel to to matmul - // A mxn - // B nxk - // C mxk - __global__ matrixMultiplyKernel(const float *dev_A, const float *dev_B, const float *dev_C, int m, int n, int k) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N*C) { + dev_hLayer[tid] = dev_hLayer[tid] * (1 - dev_hLayer[tid]); + } + } + + // Matrix Transpose + __global__ void kernMatrixTranspose(int N, int C, double *matrix, double *matrix_T) { + int row = blockIdx.y * blockDim.y + threadIdx.y; int col = blockIdx.x * blockDim.x + threadIdx.x; + + if (col < C && row < N) { + matrix_T[C*row + col] = matrix[N*col + row]; + } + } + + // Divide by N + __global__ void kernDivNdscores(int N, int C, double *dev_dL_dscores) { + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N*C) { + dev_dL_dscores[tid] /= N; + } + } + + // Compute dscores gradient + __global__ void kernSetdscores(int N, int C, double *dev_dL_dscores, int *dev_gtruth) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_dL_dscores[tid*C + dev_gtruth[tid]] -= 1; + } + } + + // compute loss per example + __global__ void kernLossPerN(int N, int C, double* dev_oLayer, int* dev_gtruth, double* dev_losses) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_losses[tid] = -log(dev_oLayer[tid*C + dev_gtruth[tid]]); + } + } + + // kernel to compute exp softmax + __global__ void kernSoftmax(int N, int C, double* scores, double *sums) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + for (int i = 0; i < C; i++) { + scores[tid*C + i] = exp(scores[tid*C + i]) / sums[tid]; + } + } + } + + // kernel to exp sum across classes + __global__ void kernSumRow(int N, int C, double* scores, double *sums) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + for (int i = 0; i < C; i++) { + sums[tid] += exp(scores[tid*C + i]); + } + } + } + + + // kernel to init weights + __global__ void kernInitWeights(int N, double* weights) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + weights[tid] = 0.5; + } + + } + + // kern for sigmoid // f(x) = 1/(1 + e^-x). + __global__ void kernSigmoid(int N, double *idata) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < N) { + idata[tid] = 1.0 / (1.0 + std::exp(-idata[tid])); + } + } - int sum = 0; + // kern for element wise product + __global__ void kernElementProduct(int N, double *matrixA, double* matrixB, double* matrixC) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < N) { + matrixC[tid] = matrixA[tid] * matrixB[tid]; + } + } + + + // kernel to to matmul // A mxn // B nxk // C mxk + __global__ void kernMatrixMultiply(const double *dev_A, const double *dev_B, double *dev_C, int m, int n, int k) { + + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + double sum = 0; if (col < k && row < m) { for (int i = 0; i < n; i++) sum += dev_A[row * n + i] * dev_B[i * k + col]; - dev_C[row * k + col] = sum; } } - } + + // Dumb reduction + __global__ void kernReduction(int N, double *dev_losses) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + double sum = 0.0; + if (tid == 0) { + for (int i = 0; i < N; i++) { + sum += dev_losses[tid]; + } + dev_losses[N-1]=sum; + } - void trainMLP(int n_input, int n_classes, int n_hidden, double *odata, const double *idata) { + } + + void trainMLP(int N, int D, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR) { + timer().startGpuTimer(); - // todo - //FORWARD PASS - // initlaise Wight layers randomly - // initialise input layer from i data - // Compute h1 = W1*x - // Compute H1 = Sig(h1) - // Compute h2 = W2*H1 - // Compute H2 = Sig(h2) - // Compute y=Softmax(H2) - - // Sample MatrixMultiplication - int n; - int m; - int k; - dim3 dimGrid((k + blockSize - 1) / blockSize, (m + blockSize - 1) / blockSize); - dim3 dimBlock(blockSize, blockSize); + // N = number of examples + // D = dim of each example + // C = number of classes + // NETWORK DEFITION_____________ + // Compute f1 = W1*X1 + // Compute X2 = Sig(f1) + // Compute Scroes S = W2*X2 + // Compute Probab P = Softmax(S) + // Compute Loss L = CEntropy(P) - timer().endGpuTimer(); - } + //================================================================ + //======================INITIALIZATIONS=========================== + //================================================================ + + // Allocate input layer + cudaMalloc((void**)&dev_iLayer, N*D*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_iLayer failed!"); + + cudaMemcpy(dev_iLayer, idata, N*D*sizeof(double), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_iLayer failed!"); + + + // Allocate hidden layer + cudaMalloc((void**)&dev_hLayer, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer failed!"); + + + // Allocate output layer + cudaMalloc((void**)&dev_oLayer, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_oLayer failed!"); + + + // Allocate softmax Den holder + cudaMalloc((void**)&dev_smaxDen, N* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_smaxDen failed!"); + + + // Allocate losses holder + cudaMalloc((void**)&dev_losses, N*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_losses failed!"); + + + // Allocate gtruth holder + cudaMalloc((void**)&dev_gtruth , N * sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_gtruth failed!"); + + cudaMemcpy(dev_gtruth, gtruth, N*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from gtruth to dev_gtruth failed!"); + + + // Allocate Weights + cudaMalloc((void**)&dev_w_kj, D*C*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); + + cudaMalloc((void**)&dev_w_ji, C*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); + + + // Allocate Derivatives + cudaMalloc((void**)&dev_dL_dw_kj, D*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); + + cudaMalloc((void**)&dev_dL_dw_ji, C*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); + + cudaMalloc((void**)&dev_dL_dscores, N*C*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_dL_dscores failed!"); + + cudaMalloc((void**)&dev_dL_dscores_2, N*C * sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_dL_dscores_2 failed!"); + + cudaMalloc((void**)&dev_hLayer_T, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + cudaMalloc((void**)&dev_iLayer_T, N*D* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + + // Init weights kj + kernInitWeights <<<((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_w_kj); + checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + // Init weights ji + kernInitWeights <<<((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_w_ji); + checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); + + + //================================================================ + //======================TRAINING LOOP============================= + //================================================================ + + for (int i = 0; i < epochs; i++) { + + //================================================================ + //========================= FORWARD ============================== + + // STEP 1 + // f1 = W1*X1 (Matrix Mul) + //================================= + // dev_hLayer = dev_iLayer*dev_w_kj + // NxC = NxD DxC + + dim3 dimBlock(blockWidth, blockWidth); + dim3 dimGrid; + dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply <<>> (dev_iLayer, dev_w_kj, dev_hLayer, N, D, C); + + // Copy back to cpu + double *tmp = new double[N*C]; + //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post matmul\n"); + //printFloatArray(N*C, tmp, true); + + // STEP 2 + // X2 = Sigmoid(f1) + //================================ + // dev_hLayer = sigmoid(dev_hLayer) + // NxC = NxC + kernSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_hLayer); + + + // Copy back to cpu + //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post sigmoid\n"); + //printFloatArray(N*C, tmp, true); + + // STEP 3 + // Scores S = W2*X2 (Matrix Mul) + //================================ + // dev_oLayer = dev_hLayer*dev_w_ji + // NxC = NxC CxC + kernMatrixMultiply <<>> (dev_hLayer, dev_w_ji, dev_oLayer, N, C, C); + checkCUDAErrorFn("kernMatrixMultiply failed!"); + + // Copy back to cpu + //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post S=W2*x2\n"); + //printFloatArray(N*C, tmp, true); + + // STEP 4 + // P = Softmax(S) + //=============== + // dev_smaxDen = Sum_Over_classses(dev_olayer) + // dev_olayer = dev_olayer/Sum_Over_classses + // NxC = NxC 1 + kernSumRow<<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); + kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_smaxDen); + checkCUDAErrorFn("kernSumRow or kernSoftmax failed!"); + + // Copy back to cpu + //cudaMemcpy(tmp, dev_smaxDen, N*sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + //printf("Post dev_smaxDen\n"); + //printFloatArray(N, tmp, true); + + // Copy back to cpu + cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + printf("Post Softmax\n"); + printFloatArray(N*C, tmp, true); + + // Compute Loss | Cross Entropy Loss + //================================== + // Compute Loss L = CEntropy(P) + kernLossPerN<<<((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_losses); + checkCUDAErrorFn("kernLossPerN failed!"); + + // Copy back to cpu + cudaMemcpy(tmp, dev_losses, N*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); + printf("Post dev_losses\n"); + printFloatArray(N, tmp, true); + + // Dumb Reduction + kernReduction<< <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses); + // Copy back to cpu + cudaMemcpy(tmp, dev_losses+N-1, sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); + printf("Epoch: %3d | dev_loss %3f \n", i, tmp[0]); + + // Track loss here + losses[i] = tmp[0]; + + //================================================================= + //========================= BACKPROP ============================== + + // STEP 1 : Gradient wrt w_ji + // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; + cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C*sizeof(double), cudaMemcpyDeviceToDevice); + checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); + + kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); + checkCUDAErrorFn("kernSetdscores failed!"); + + kernDivNdscores <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_dL_dscores); + checkCUDAErrorFn("kernDivNdscores failed!"); - // MLP section 2 Character Reader - //void initCharMLP(int N, int P, int iDim, int hDim, int oDim); - //void readData(int N, int P, int iDim, int hDim, int oDim); - //void trainCharMLP(int N, int P, int iDim, int hDim, int oDim); - //void testCharMLP(int N, int P, int iDim, int hDim, int oDim); + dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose <<> > (N, C, dev_hLayer, dev_hLayer_T); + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, C, N, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + // STEP 2 : Gradient wrt w_kj + + // Mul dscores * dev_w_kj == dev_dL_dscores_2 + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_dL_dscores, dev_w_kj, dev_dL_dscores_2, N, C, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + // compute sig gradient on dev_hlayer + kernGradSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_hLayer); + checkCUDAErrorFn("kernGradSigmoid failed!"); + + //Element wise mul dev_dL_dscores_2 = dev_dL_dscores_2 . dev_hlayer[sig gradient] + kernElementProduct <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N*C, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); + checkCUDAErrorFn("kernElementProduct failed!"); + + // Transpose X1 + dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (D + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose <<>> (N, D, dev_iLayer, dev_iLayer_T); + + // matrix Mul + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_iLayer_T, dev_dL_dscores_2, dev_dL_dw_kj, D, N, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + + //================================================================= + //========================= Update Weights========================= + + // Update weights kj + kernUpdateWeights << <((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_dL_dw_kj, dev_w_kj, LR); + checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + // InitUpdate weights ji + kernUpdateWeights << <((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_dL_dw_ji, dev_w_ji, LR); + checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); + + // COntinue to next epoch + double *tmp2 = new double[D*D]; + cudaMemcpy(tmp2, dev_dL_dw_kj, D*C*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); + printFloatArray(D*C, tmp2, true); + cudaMemcpy(tmp2, dev_dL_dw_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); + printFloatArray(C*C, tmp2, true); + + printf("\n"); + } + + + printf("Finished training.\n"); + + timer().endGpuTimer(); + } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index ba53ffd..cede1eb 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -7,10 +7,10 @@ namespace CharacterRecognition { // TODO: implement required elements for MLP sections 1 and 2 here // MLP section 1 and 2 Character Reader - void initMLP(int N, int P, int iDim, int hDim, int oDim); - void readData(int N, int P, int iDim, int hDim, int oDim); - void trainMLP(int N, int P, int iDim, int hDim, int oDim); - void testMLP(int N, int P, int iDim, int hDim, int oDim); + //void initMLP(int N, int P, int iDim, int hDim, int oDim); + //void readData(int N, int P, int iDim, int hDim, int oDim); + //void testMLP(int N, int P, int iDim, int hDim, int oDim); + void trainMLP(int N, int D, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR); } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 9f11dcc..b4addfe 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -11,20 +11,20 @@ #include #include "testing_helpers.hpp" +//____CONFIG Neural Network_____________________ +const int N = 4; // 52; // Number of examples +const int D = 2; //; // Feature length per example +const int C = 2; // NN number of classes -// CONFIG ITEMS +const double LR = 0.1; -const int N = 52; // Number of examples -const int P = 10201; // Feature length +const int epochs = 1000; +double *losses = new double[epochs]; -int inputLayerSize = 2; //NN input layer size -int hiddenLayerSize = 2; //NN hidden layer size -int outputLayerSize = 1; //NN output layer size -int nClasses = 2; //NN number of classes +double *idata = new double[N*D]; +int * preds = new int[N]; +int * gtruth = new int[N]; -int *a = new int[SIZE]; -int *b = new int[SIZE]; -int *c = new int[SIZE]; int main(int argc, char* argv[]) { // Scan tests @@ -34,10 +34,29 @@ int main(int argc, char* argv[]) { printf("***MLP TESTS***\n"); printf("****************\n"); + printf("Launch XOR Training\n"); + - CharacterRecognition::trainMLP(SIZE, b, a); - CharacterRecognition::trainMLP(SIZE, b, a); + // XOR input dtat set 2 * 4 + idata[0] = 0; + idata[1] = 0; + idata[2] = 0; + idata[3] = 1; + idata[4] = 1; + idata[5] = 0; + idata[6] = 1; + idata[7] = 1; + + // XOR input dtat set 2 * 4 + gtruth[0] = 0; + gtruth[1] = 1; + gtruth[2] = 1; + gtruth[3] = 0; + + CharacterRecognition::trainMLP(N, D, C, idata, preds, gtruth, epochs, losses, LR); + + printf("\nCompleted XOR Training\n"); return 0; } From 0b2d9ffda14a07beeeba2daaef3a3afe6031a730 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Mon, 16 Sep 2019 18:01:33 -0400 Subject: [PATCH 06/76] Updates on BP --- .../character_recognition/mlp.cu | 370 ++++++++---- .../character_recognition/mlp.cu_back | 542 ++++++++++++++++++ .../character_recognition/mlp.h | 2 +- Project2-Character-Recognition/src/main.cpp | 7 +- 4 files changed, 808 insertions(+), 113 deletions(-) create mode 100644 Project2-Character-Recognition/character_recognition/mlp.cu_back diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index fa2eb4d..e2f74a0 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -2,29 +2,36 @@ #include #include "common.h" #include "mlp.h" + #include +#include #define blockSize 128 #define blockWidth 16 namespace CharacterRecognition { - using Common::PerformanceTimer; - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } - + using Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + // Initlialiations - + //layers double *dev_iLayer; double *dev_hLayer; double *dev_oLayer; double *dev_smaxDen; + double *dev_losses; + double *dev_LossAvg; + + // gtruth and preds int *dev_gtruth; + int *dev_preds; //weights double *dev_w_kj; @@ -40,8 +47,30 @@ namespace CharacterRecognition { double *dev_iLayer_T; + //============================================= + // Rnadom Number Generation using cuRand on GPU + //============================================= + curandState *devState; + + __global__ void kernInitCurand(curandState *state, int N, unsigned long seed) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + curand_init(seed, tid, 0, &state[tid]); + } + } + + __global__ void KernGenRand(curandState *state, int N, double *w) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + w[tid] = 2.0*curand_uniform(&state[tid]) - 1.0; // Between -1 and 1 + } + } + + //=================================================================== + //=====KERNEL DEFNITIONS FOR Forward and Backward==================== + //=================================================================== + - void printArray(int n, int *a, bool abridged = false) { printf(" [ "); for (int i = 0; i < n; i++) { @@ -51,7 +80,7 @@ namespace CharacterRecognition { } printf("%3d ", a[i]); } - printf("]\n"); + printf("]\n\n"); } void printFloatArray(int n, double *a, bool abridged = false) { printf(" [ "); @@ -62,7 +91,7 @@ namespace CharacterRecognition { } printf("%3f ", a[i]); } - printf("]\n"); + printf("]\n\n"); } @@ -81,7 +110,7 @@ namespace CharacterRecognition { __global__ void kernGradSigmoid(int N, int C, double *dev_hLayer) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - + if (tid < N*C) { dev_hLayer[tid] = dev_hLayer[tid] * (1 - dev_hLayer[tid]); } @@ -100,7 +129,7 @@ namespace CharacterRecognition { // Divide by N __global__ void kernDivNdscores(int N, int C, double *dev_dL_dscores) { - + int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N*C) { dev_dL_dscores[tid] /= N; @@ -108,7 +137,7 @@ namespace CharacterRecognition { } // Compute dscores gradient - __global__ void kernSetdscores(int N, int C, double *dev_dL_dscores, int *dev_gtruth) { + __global__ void kernSetdscores(int N, int C, double *dev_dL_dscores, int *dev_gtruth) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { @@ -116,6 +145,15 @@ namespace CharacterRecognition { } } + // compute predictions + __global__ void kernPredsN(int N, int C, double* dev_oLayer, int* dev_gtruth, int* dev_preds) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_preds[tid] = dev_oLayer[tid*C + dev_gtruth[tid]] > 0.5 ? dev_gtruth[tid] : (dev_gtruth[tid]==0 ? 1:0) ; + } + } + // compute loss per example __global__ void kernLossPerN(int N, int C, double* dev_oLayer, int* dev_gtruth, double* dev_losses) { int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -149,7 +187,7 @@ namespace CharacterRecognition { // kernel to init weights __global__ void kernInitWeights(int N, double* weights) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - + if (tid < N) { weights[tid] = 0.5; } @@ -165,9 +203,9 @@ namespace CharacterRecognition { idata[tid] = 1.0 / (1.0 + std::exp(-idata[tid])); } } - + // kern for element wise product - __global__ void kernElementProduct(int N, double *matrixA, double* matrixB, double* matrixC) { + __global__ void kernElementProduct(int N, double *matrixA, double* matrixB, double* matrixC) { int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -191,23 +229,23 @@ namespace CharacterRecognition { dev_C[row * k + col] = sum; } } - + // Dumb reduction - __global__ void kernReduction(int N, double *dev_losses) { + __global__ void kernReduction(int N, double *dev_losses, double *dev_LossAvg) { int tid = blockIdx.x * blockDim.x + threadIdx.x; double sum = 0.0; if (tid == 0) { for (int i = 0; i < N; i++) { - sum += dev_losses[tid]; + sum += dev_losses[i]; } - dev_losses[N-1]=sum; + dev_LossAvg[0] = sum/N; } } - void trainMLP(int N, int D, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR) { - + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *lossAvgPerEpoch, const double LR) { + timer().startGpuTimer(); // N = number of examples @@ -224,135 +262,166 @@ namespace CharacterRecognition { //================================================================ //======================INITIALIZATIONS=========================== //================================================================ - + // Allocate input layer - cudaMalloc((void**)&dev_iLayer, N*D*sizeof(double)); + cudaMalloc((void**)&dev_iLayer, N*D * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_iLayer failed!"); - cudaMemcpy(dev_iLayer, idata, N*D*sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(dev_iLayer, idata, N*D * sizeof(double), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_iLayer failed!"); // Allocate hidden layer - cudaMalloc((void**)&dev_hLayer, N*C* sizeof(double)); + cudaMalloc((void**)&dev_hLayer, N*H* sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer failed!"); // Allocate output layer - cudaMalloc((void**)&dev_oLayer, N*C* sizeof(double)); + cudaMalloc((void**)&dev_oLayer, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_oLayer failed!"); // Allocate softmax Den holder - cudaMalloc((void**)&dev_smaxDen, N* sizeof(double)); + cudaMalloc((void**)&dev_smaxDen, N * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_smaxDen failed!"); // Allocate losses holder - cudaMalloc((void**)&dev_losses, N*sizeof(double)); + cudaMalloc((void**)&dev_losses, N * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_losses failed!"); + + cudaMalloc((void**)&dev_LossAvg, 1* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_LossAvg failed!"); - // Allocate gtruth holder - cudaMalloc((void**)&dev_gtruth , N * sizeof(int)); + // Allocate gtruth and preds + cudaMalloc((void**)&dev_gtruth, N * sizeof(int)); checkCUDAErrorFn("cudaMalloc dev_gtruth failed!"); - cudaMemcpy(dev_gtruth, gtruth, N*sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(dev_gtruth, gtruth, N * sizeof(int), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from gtruth to dev_gtruth failed!"); + cudaMalloc((void**)&dev_preds, N * sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_preds failed!"); // Allocate Weights - cudaMalloc((void**)&dev_w_kj, D*C*sizeof(double)); + cudaMalloc((void**)&dev_w_kj, D*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); - cudaMalloc((void**)&dev_w_ji, C*C* sizeof(double)); + cudaMalloc((void**)&dev_w_ji, C*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); - + // Allocate Derivatives - cudaMalloc((void**)&dev_dL_dw_kj, D*C* sizeof(double)); + cudaMalloc((void**)&dev_dL_dw_kj, D*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); - cudaMalloc((void**)&dev_dL_dw_ji, C*C* sizeof(double)); + cudaMalloc((void**)&dev_dL_dw_ji, C*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); - cudaMalloc((void**)&dev_dL_dscores, N*C*sizeof(double)); + cudaMalloc((void**)&dev_dL_dscores, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_dL_dscores failed!"); cudaMalloc((void**)&dev_dL_dscores_2, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_dL_dscores_2 failed!"); - cudaMalloc((void**)&dev_hLayer_T, N*C* sizeof(double)); + cudaMalloc((void**)&dev_hLayer_T, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - - cudaMalloc((void**)&dev_iLayer_T, N*D* sizeof(double)); + + cudaMalloc((void**)&dev_iLayer_T, N*D * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + //==================== + // Initialise Weights + //==================== + cudaMalloc((void**)&devState, H*D* sizeof(curandState)); + + kernInitCurand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, 0); + checkCUDAErrorFn("KernInitCurand failed!"); - // Init weights kj - kernInitWeights <<<((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_w_kj); + KernGenRand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, dev_w_kj); checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); - // Init weights ji - kernInitWeights <<<((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_w_ji); - checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); + kernInitCurand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, 0); + checkCUDAErrorFn("KernInitCurand failed!"); + + KernGenRand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji); + checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + /*double *rand = new double[D*C]; + cudaMemcpy(rand, dev_w_kj, D*C* sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w_kj to rand failed!"); + printf("Post random inits dev_w_kj - \n"); + printFloatArray(D*C, rand, true);*/ + + /*double *rand2 = new double[C*C]; + cudaMemcpy(rand2, dev_w_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w_kj to rand failed!"); + printf("Post random inits dev_w_ji - \n"); + printFloatArray(C*C, rand2, true);*/ - //================================================================ //======================TRAINING LOOP============================= //================================================================ - + double *tmp = new double[N*H]; + double *tmp2 = new double[D*D]; + double *lossesN = new double[N]; + + + printf("Input DATA\n"); + printFloatArray(N*D, idata, true); + for (int i = 0; i < epochs; i++) { - + //================================================================ //========================= FORWARD ============================== - + // STEP 1 // f1 = W1*X1 (Matrix Mul) //================================= // dev_hLayer = dev_iLayer*dev_w_kj - // NxC = NxD DxC + // NxH = NxD DxH dim3 dimBlock(blockWidth, blockWidth); dim3 dimGrid; dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply <<>> (dev_iLayer, dev_w_kj, dev_hLayer, N, D, C); + kernMatrixMultiply << > > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); // Copy back to cpu - double *tmp = new double[N*C]; - //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post matmul\n"); - //printFloatArray(N*C, tmp, true); + //double *tmp = new double[N*H]; + cudaMemcpy(tmp, dev_hLayer, N*H* sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + printf("Post matmul [f1 = dev_iLayer*dev_w_kj]\n"); + printFloatArray(N*H, tmp, true); // STEP 2 // X2 = Sigmoid(f1) //================================ // dev_hLayer = sigmoid(dev_hLayer) - // NxC = NxC - kernSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_hLayer); + // NxH = NxH + kernSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer); // Copy back to cpu - //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post sigmoid\n"); - //printFloatArray(N*C, tmp, true); + cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + printf("Post sigmoid [X2 = Sigmoid(f1) ]\n"); + printFloatArray(N*H, tmp, true); // STEP 3 // Scores S = W2*X2 (Matrix Mul) //================================ // dev_oLayer = dev_hLayer*dev_w_ji - // NxC = NxC CxC - kernMatrixMultiply <<>> (dev_hLayer, dev_w_ji, dev_oLayer, N, C, C); + // NxC = NxH HxC + kernMatrixMultiply << > > (dev_hLayer, dev_w_ji, dev_oLayer, N, H, C); checkCUDAErrorFn("kernMatrixMultiply failed!"); // Copy back to cpu - //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post S=W2*x2\n"); - //printFloatArray(N*C, tmp, true); + cudaMemcpy(tmp, dev_oLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + printf("Post S=W2*x2\n"); + printFloatArray(N*C, tmp, true); // STEP 4 // P = Softmax(S) @@ -360,87 +429,129 @@ namespace CharacterRecognition { // dev_smaxDen = Sum_Over_classses(dev_olayer) // dev_olayer = dev_olayer/Sum_Over_classses // NxC = NxC 1 - kernSumRow<<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); - kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_smaxDen); + kernSumRow <<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); + kernSoftmax <<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); checkCUDAErrorFn("kernSumRow or kernSoftmax failed!"); // Copy back to cpu - //cudaMemcpy(tmp, dev_smaxDen, N*sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - //printf("Post dev_smaxDen\n"); - //printFloatArray(N, tmp, true); + cudaMemcpy(tmp, dev_smaxDen, N*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + printf("Post dev_smaxDen [dev_smaxDen = Sum_Over_classses(dev_olayer)]\n"); + printFloatArray(N, tmp, true); // Copy back to cpu cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - printf("Post Softmax\n"); + printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); printFloatArray(N*C, tmp, true); - // Compute Loss | Cross Entropy Loss + // STEP 5 + // Compute Losses | Cross Entropy Loss //================================== // Compute Loss L = CEntropy(P) - kernLossPerN<<<((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_losses); + kernLossPerN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_losses); checkCUDAErrorFn("kernLossPerN failed!"); // Copy back to cpu - cudaMemcpy(tmp, dev_losses, N*sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); - printf("Post dev_losses\n"); - printFloatArray(N, tmp, true); + cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); + printf("Post dev_losses [Loss = CEntropy(P)]\n"); + printFloatArray(N, lossesN, true); + // STEP 5.2 + // Compute Avg of Losses + //================================== // Dumb Reduction - kernReduction<< <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses); - // Copy back to cpu - cudaMemcpy(tmp, dev_losses+N-1, sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); - printf("Epoch: %3d | dev_loss %3f \n", i, tmp[0]); - // Track loss here - losses[i] = tmp[0]; + kernReduction << <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses, dev_LossAvg); + // Copy back to cpu + cudaMemcpy(lossAvgPerEpoch + i, dev_LossAvg, sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_LossAvg to tmp failed!"); + //printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); + + + // Predictions + kernPredsN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_preds); + cudaMemcpy(preds, dev_preds, N * sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds to preds failed!"); + printf("Predictions\n"); + printArray(N, preds, true); //================================================================= //========================= BACKPROP ============================== - + // STEP 1 : Gradient wrt w_ji // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; - cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C*sizeof(double), cudaMemcpyDeviceToDevice); + cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C* sizeof(double), cudaMemcpyDeviceToDevice); checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); - + kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); checkCUDAErrorFn("kernSetdscores failed!"); - kernDivNdscores <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_dL_dscores); + // Copy back to cpu + cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); + printf("Post setting loss at positions dev_dL_dscores \n"); + printFloatArray(N*C, tmp, true); + + kernDivNdscores << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores); checkCUDAErrorFn("kernDivNdscores failed!"); - dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixTranspose <<> > (N, C, dev_hLayer, dev_hLayer_T); + cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); + printf("Post div by N -> setting loss at positions-> dev_dL_dscores \n"); + printFloatArray(N*C, tmp, true); + + + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); + + cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer to tmp failed!"); + printf("dev_hLayer \n"); + printFloatArray(N*H, tmp, true); + + cudaMemcpy(tmp, dev_hLayer_T, N*H* sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer_T to tmp failed!"); + printf("dev_hLayer_T \n"); + printFloatArray(N*H, tmp, true); + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, C, N, C); + kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, H, N, C); checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + //=========================== // STEP 2 : Gradient wrt w_kj - - // Mul dscores * dev_w_kj == dev_dL_dscores_2 + //=========================== + + // Transpose Wji + //TODO HERE + //dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + //dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + //kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); + break; + // Mul dev_dL_dscores * dev_w_kj == dev_dL_dscores_2 dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_dL_dscores, dev_w_kj, dev_dL_dscores_2, N, C, C); checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); - + // compute sig gradient on dev_hlayer - kernGradSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_hLayer); + kernGradSigmoid << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_hLayer); checkCUDAErrorFn("kernGradSigmoid failed!"); //Element wise mul dev_dL_dscores_2 = dev_dL_dscores_2 . dev_hlayer[sig gradient] - kernElementProduct <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N*C, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); + kernElementProduct << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); checkCUDAErrorFn("kernElementProduct failed!"); // Transpose X1 dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (D + dimBlock.y - 1) / dimBlock.y; - kernMatrixTranspose <<>> (N, D, dev_iLayer, dev_iLayer_T); + kernMatrixTranspose << > > (N, D, dev_iLayer, dev_iLayer_T); // matrix Mul dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; @@ -459,22 +570,63 @@ namespace CharacterRecognition { // InitUpdate weights ji kernUpdateWeights << <((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_dL_dw_ji, dev_w_ji, LR); checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); - + // COntinue to next epoch - double *tmp2 = new double[D*D]; - cudaMemcpy(tmp2, dev_dL_dw_kj, D*C*sizeof(double), cudaMemcpyDeviceToHost); + + cudaMemcpy(tmp2, dev_w_kj, D*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("dev_w_kj memcopy failed!"); + printf("w_kj \n"); + printFloatArray(D*C, tmp2, true); + cudaMemcpy(tmp2, dev_dL_dw_kj, D*C * sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); + printf("Dw_kj \n"); printFloatArray(D*C, tmp2, true); + + cudaMemcpy(tmp2, dev_w_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("dev_w_ji memcopy failed!"); + printf("w_ji \n"); + printFloatArray(C*C, tmp2, true); cudaMemcpy(tmp2, dev_dL_dw_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); + printf("Dw_ji \n"); printFloatArray(C*C, tmp2, true); - printf("\n"); + + printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); + printf("\n-----------------------------------------------------\n\n"); } printf("Finished training.\n"); + + //==================== + // CleanUp + //==================== + cudaFree(dev_iLayer); + cudaFree(dev_hLayer); + cudaFree(dev_oLayer); + + cudaFree(dev_smaxDen); + cudaFree(dev_losses); + cudaFree(dev_gtruth); + cudaFree(dev_preds); + + cudaFree(dev_w_kj); + cudaFree(dev_w_ji); + + cudaFree(dev_dL_dw_ji); + cudaFree(dev_dL_dw_kj); + + cudaFree(dev_dL_dscores); + cudaFree(dev_dL_dscores_2); + + cudaFree(dev_hLayer_T); + cudaFree(dev_iLayer_T); + + delete(tmp); + delete(tmp2); + timer().endGpuTimer(); - } + } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu_back b/Project2-Character-Recognition/character_recognition/mlp.cu_back new file mode 100644 index 0000000..fce922c --- /dev/null +++ b/Project2-Character-Recognition/character_recognition/mlp.cu_back @@ -0,0 +1,542 @@ +#include +#include +#include "common.h" +#include "mlp.h" + +#include +#include + +#define blockSize 128 +#define blockWidth 16 + + +namespace CharacterRecognition { + using Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + + // Initlialiations + + //layers + double *dev_iLayer; + double *dev_hLayer; + double *dev_oLayer; + double *dev_smaxDen; + double *dev_losses; + int *dev_gtruth; + + //weights + double *dev_w_kj; + double *dev_w_ji; + + //Derivatives + double *dev_dL_dw_ji; + double *dev_dL_dw_kj; + double *dev_dL_dscores; + double *dev_dL_dscores_2; + + double *dev_hLayer_T; + double *dev_iLayer_T; + + //============================================= + // Rnadom Number Generation using cuRand on GPU + //============================================= + curandState *devState; + + __global__ void kernInitCurand(curandState *state, int N, unsigned long seed) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + curand_init(seed, tid, 0, &state[tid]); + } + } + + __global__ void KernGenRand(curandState *state, int N, double *w) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + w[tid] = 2.0*curand_uniform(&state[tid]) - 1; // Between -1 and 1 + } + } + + //=================================================================== + //=====KERNEL DEFNITIONS FOR Forward and Backward==================== + //=================================================================== + + void printArray(int n, int *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3d ", a[i]); + } + printf("]\n"); + } + void printFloatArray(int n, double *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3f ", a[i]); + } + printf("]\n"); + } + + + // Kernel for Gradient update on Weights + __global__ void kernUpdateWeights(int N, double *dev_dw, double *dev_w, double LR) { + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_w[tid] += -LR * dev_dw[tid]; + } + } + + // Kernel for derivative of sigmoid + __global__ void kernGradSigmoid(int N, int C, double *dev_hLayer) { + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N*C) { + dev_hLayer[tid] = dev_hLayer[tid] * (1 - dev_hLayer[tid]); + } + } + + // Matrix Transpose + __global__ void kernMatrixTranspose(int N, int C, double *matrix, double *matrix_T) { + + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + if (col < C && row < N) { + matrix_T[C*row + col] = matrix[N*col + row]; + } + } + + // Divide by N + __global__ void kernDivNdscores(int N, int C, double *dev_dL_dscores) { + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N*C) { + dev_dL_dscores[tid] /= N; + } + } + + // Compute dscores gradient + __global__ void kernSetdscores(int N, int C, double *dev_dL_dscores, int *dev_gtruth) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_dL_dscores[tid*C + dev_gtruth[tid]] -= 1; + } + } + + // compute loss per example + __global__ void kernLossPerN(int N, int C, double* dev_oLayer, int* dev_gtruth, double* dev_losses) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + dev_losses[tid] = -log(dev_oLayer[tid*C + dev_gtruth[tid]]); + } + } + + // kernel to compute exp softmax + __global__ void kernSoftmax(int N, int C, double* scores, double *sums) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + for (int i = 0; i < C; i++) { + scores[tid*C + i] = exp(scores[tid*C + i]) / sums[tid]; + } + } + } + + // kernel to exp sum across classes + __global__ void kernSumRow(int N, int C, double* scores, double *sums) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + for (int i = 0; i < C; i++) { + sums[tid] += exp(scores[tid*C + i]); + } + } + } + + + // kernel to init weights + __global__ void kernInitWeights(int N, double* weights) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid < N) { + weights[tid] = 0.5; + } + + } + + // kern for sigmoid // f(x) = 1/(1 + e^-x). + __global__ void kernSigmoid(int N, double *idata) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < N) { + idata[tid] = 1.0 / (1.0 + std::exp(-idata[tid])); + } + } + + // kern for element wise product + __global__ void kernElementProduct(int N, double *matrixA, double* matrixB, double* matrixC) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < N) { + matrixC[tid] = matrixA[tid] * matrixB[tid]; + } + } + + + // kernel to to matmul // A mxn // B nxk // C mxk + __global__ void kernMatrixMultiply(const double *dev_A, const double *dev_B, double *dev_C, int m, int n, int k) { + + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + + double sum = 0; + if (col < k && row < m) + { + for (int i = 0; i < n; i++) + sum += dev_A[row * n + i] * dev_B[i * k + col]; + dev_C[row * k + col] = sum; + } + } + + // Dumb reduction + __global__ void kernReduction(int N, double *dev_losses) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + double sum = 0.0; + if (tid == 0) { + for (int i = 0; i < N; i++) { + sum += dev_losses[tid]; + } + dev_losses[N-1]=sum; + } + + } + + void trainMLP(int N, int D, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR) { + + timer().startGpuTimer(); + + // N = number of examples + // D = dim of each example + // C = number of classes + + // NETWORK DEFITION_____________ + // Compute f1 = W1*X1 + // Compute X2 = Sig(f1) + // Compute Scroes S = W2*X2 + // Compute Probab P = Softmax(S) + // Compute Loss L = CEntropy(P) + + //================================================================ + //======================INITIALIZATIONS=========================== + //================================================================ + + // Allocate input layer + cudaMalloc((void**)&dev_iLayer, N*D*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_iLayer failed!"); + + cudaMemcpy(dev_iLayer, idata, N*D*sizeof(double), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_iLayer failed!"); + + + // Allocate hidden layer + cudaMalloc((void**)&dev_hLayer, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer failed!"); + + + // Allocate output layer + cudaMalloc((void**)&dev_oLayer, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_oLayer failed!"); + + + // Allocate softmax Den holder + cudaMalloc((void**)&dev_smaxDen, N* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_smaxDen failed!"); + + + // Allocate losses holder + cudaMalloc((void**)&dev_losses, N*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_losses failed!"); + + + // Allocate gtruth holder + cudaMalloc((void**)&dev_gtruth , N * sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_gtruth failed!"); + + cudaMemcpy(dev_gtruth, gtruth, N*sizeof(int), cudaMemcpyHostToDevice); + checkCUDAErrorFn("cudaMemcpyToSymbol from gtruth to dev_gtruth failed!"); + + + // Allocate Weights + cudaMalloc((void**)&dev_w_kj, D*C*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); + + cudaMalloc((void**)&dev_w_ji, C*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); + + + // Allocate Derivatives + cudaMalloc((void**)&dev_dL_dw_kj, D*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); + + cudaMalloc((void**)&dev_dL_dw_ji, C*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); + + cudaMalloc((void**)&dev_dL_dscores, N*C*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_dL_dscores failed!"); + + cudaMalloc((void**)&dev_dL_dscores_2, N*C * sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_dL_dscores_2 failed!"); + + cudaMalloc((void**)&dev_hLayer_T, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + cudaMalloc((void**)&dev_iLayer_T, N*D* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + + // Initialise Weights + + kernInitCurand << <((D*C + blockSize - 1) / blockSize), blockSize >> > (devState, D*C, 27); + checkCUDAErrorFn("KernInitCurand failed!"); + + KernGenRand << <((D*C + blockSize - 1) / blockSize), blockSize >> > (devState, D*C, dev_w_kj); + checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + kernInitCurand << <((C*C + blockSize - 1) / blockSize), blockSize >> > (devState, C*C, 18); + checkCUDAErrorFn("KernInitCurand failed!"); + + KernGenRand << <((C*C + blockSize - 1) / blockSize), blockSize >> > (devState, C*C, dev_w_ji); + checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + //kernInitWeights <<<((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_w_kj); + //checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + //kernInitWeights <<<((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_w_ji); + //checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); + double *tmp = new double[N*C]; + double *tmp2 = new double[D*D]; + //================================================================ + //======================TRAINING LOOP============================= + //================================================================ + + for (int i = 0; i < epochs; i++) { + + //================================================================ + //========================= FORWARD ============================== + + // STEP 1 + // f1 = W1*X1 (Matrix Mul) + //================================= + // dev_hLayer = dev_iLayer*dev_w_kj + // NxC = NxD DxC + + dim3 dimBlock(blockWidth, blockWidth); + dim3 dimGrid; + dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply <<>> (dev_iLayer, dev_w_kj, dev_hLayer, N, D, C); + + // Copy back to cpu + + //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post matmul\n"); + //printFloatArray(N*C, tmp, true); + + // STEP 2 + // X2 = Sigmoid(f1) + //================================ + // dev_hLayer = sigmoid(dev_hLayer) + // NxC = NxC + kernSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_hLayer); + + + // Copy back to cpu + //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post sigmoid\n"); + //printFloatArray(N*C, tmp, true); + + // STEP 3 + // Scores S = W2*X2 (Matrix Mul) + //================================ + // dev_oLayer = dev_hLayer*dev_w_ji + // NxC = NxC CxC + kernMatrixMultiply <<>> (dev_hLayer, dev_w_ji, dev_oLayer, N, C, C); + checkCUDAErrorFn("kernMatrixMultiply failed!"); + + // Copy back to cpu + //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post S=W2*x2\n"); + //printFloatArray(N*C, tmp, true); + + // STEP 4 + // P = Softmax(S) + //=============== + // dev_smaxDen = Sum_Over_classses(dev_olayer) + // dev_olayer = dev_olayer/Sum_Over_classses + // NxC = NxC 1 + kernSumRow<<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); + kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_smaxDen); + checkCUDAErrorFn("kernSumRow or kernSoftmax failed!"); + + // Copy back to cpu + //cudaMemcpy(tmp, dev_smaxDen, N*sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + //printf("Post dev_smaxDen\n"); + //printFloatArray(N, tmp, true); + + // Copy back to cpu + cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + printf("Post Softmax\n"); + printFloatArray(N*C, tmp, true); + + // Compute Loss | Cross Entropy Loss + //================================== + // Compute Loss L = CEntropy(P) + kernLossPerN<<<((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_losses); + checkCUDAErrorFn("kernLossPerN failed!"); + + // Copy back to cpu + cudaMemcpy(tmp, dev_losses, N*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); + printf("Post dev_losses\n"); + printFloatArray(N, tmp, true); + + // Dumb Reduction + kernReduction<< <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses); + // Copy back to cpu + cudaMemcpy(tmp, dev_losses+N-1, sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); + printf("Epoch: %3d | dev_loss %3f \n", i, tmp[0]); + + // Track loss here + losses[i] = tmp[0]; + + //================================================================= + //========================= BACKPROP ============================== + + // STEP 1 : Gradient wrt w_ji + // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; + cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C*sizeof(double), cudaMemcpyDeviceToDevice); + checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); + + kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); + checkCUDAErrorFn("kernSetdscores failed!"); + + kernDivNdscores <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_dL_dscores); + checkCUDAErrorFn("kernDivNdscores failed!"); + + dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose <<> > (N, C, dev_hLayer, dev_hLayer_T); + + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, C, N, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + // STEP 2 : Gradient wrt w_kj + + // Mul dscores * dev_w_kj == dev_dL_dscores_2 + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_dL_dscores, dev_w_kj, dev_dL_dscores_2, N, C, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + // compute sig gradient on dev_hlayer + kernGradSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_hLayer); + checkCUDAErrorFn("kernGradSigmoid failed!"); + + //Element wise mul dev_dL_dscores_2 = dev_dL_dscores_2 . dev_hlayer[sig gradient] + kernElementProduct <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N*C, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); + checkCUDAErrorFn("kernElementProduct failed!"); + + // Transpose X1 + dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (D + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose <<>> (N, D, dev_iLayer, dev_iLayer_T); + + // matrix Mul + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_iLayer_T, dev_dL_dscores_2, dev_dL_dw_kj, D, N, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + + + //================================================================= + //========================= Update Weights========================= + + // Update weights kj + kernUpdateWeights << <((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_dL_dw_kj, dev_w_kj, LR); + checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + + // Update weights ji + kernUpdateWeights << <((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_dL_dw_ji, dev_w_ji, LR); + checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); + + // Continue to next epoch + + cudaMemcpy(tmp2, dev_dL_dw_kj, D*C*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); + printFloatArray(D*C, tmp2, true); + cudaMemcpy(tmp2, dev_dL_dw_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); + printFloatArray(C*C, tmp2, true); + + printf("\n"); + + } + + printf("Finished training.\n"); + printf("Free Device Buffers.\n"); + + //==================== + // CleanUp + //==================== + cudaFree(dev_iLayer); + cudaFree(dev_hLayer); + cudaFree(dev_oLayer); + + cudaFree(dev_smaxDen); + cudaFree(dev_losses); + cudaFree(dev_gtruth); + + cudaFree(dev_w_kj); + cudaFree(dev_w_ji); + + cudaFree(dev_dL_dw_ji); + cudaFree(dev_dL_dw_kj); + + cudaFree(dev_dL_dscores); + cudaFree(dev_dL_dscores_2); + + cudaFree(dev_hLayer_T); + cudaFree(dev_iLayer_T); + + delete(tmp); + delete(tmp2); + + checkCUDAErrorFn("cudaFree failed!"); + + timer().endGpuTimer(); + } +} diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index cede1eb..7943f19 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -11,6 +11,6 @@ namespace CharacterRecognition { //void readData(int N, int P, int iDim, int hDim, int oDim); //void testMLP(int N, int P, int iDim, int hDim, int oDim); - void trainMLP(int N, int D, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR); + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR); } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index b4addfe..f93a64e 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -14,11 +14,12 @@ //____CONFIG Neural Network_____________________ const int N = 4; // 52; // Number of examples const int D = 2; //; // Feature length per example +const int H = 2; //; // Number of Hidden unit const int C = 2; // NN number of classes -const double LR = 0.1; +const double LR = 0.01; -const int epochs = 1000; +const int epochs = 100; double *losses = new double[epochs]; double *idata = new double[N*D]; @@ -54,7 +55,7 @@ int main(int argc, char* argv[]) { gtruth[2] = 1; gtruth[3] = 0; - CharacterRecognition::trainMLP(N, D, C, idata, preds, gtruth, epochs, losses, LR); + CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR); printf("\nCompleted XOR Training\n"); From 124c6837be9c27314655affe1fed5eacfe57fd38 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Mon, 16 Sep 2019 23:19:51 -0400 Subject: [PATCH 07/76] Completed XOR --- .../character_recognition/mlp.cu | 341 ++++++++++-------- Project2-Character-Recognition/src/main.cpp | 32 +- 2 files changed, 219 insertions(+), 154 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index e2f74a0..944974c 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -24,7 +24,11 @@ namespace CharacterRecognition { double *dev_iLayer; double *dev_hLayer; double *dev_oLayer; - double *dev_smaxDen; + + double *dev_b1; + double *dev_b2; + double *dev_db1; + double *dev_db2; double *dev_losses; double *dev_LossAvg; @@ -32,6 +36,7 @@ namespace CharacterRecognition { // gtruth and preds int *dev_gtruth; int *dev_preds; + double * dev_preds_probab; //weights double *dev_w_kj; @@ -45,6 +50,7 @@ namespace CharacterRecognition { double *dev_hLayer_T; double *dev_iLayer_T; + double *dev_w_ji_T; //============================================= @@ -62,7 +68,7 @@ namespace CharacterRecognition { __global__ void KernGenRand(curandState *state, int N, double *w) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { - w[tid] = 2.0*curand_uniform(&state[tid]) - 1.0; // Between -1 and 1 + w[tid] = (2.0*curand_uniform(&state[tid]) - 1.0); // Between -1 and 1 } } @@ -107,11 +113,11 @@ namespace CharacterRecognition { } // Kernel for derivative of sigmoid - __global__ void kernGradSigmoid(int N, int C, double *dev_hLayer) { + __global__ void kernGradSigmoid(int N, int H, double *dev_hLayer) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid < N*C) { + if (tid < N*H) { dev_hLayer[tid] = dev_hLayer[tid] * (1 - dev_hLayer[tid]); } } @@ -146,11 +152,12 @@ namespace CharacterRecognition { } // compute predictions - __global__ void kernPredsN(int N, int C, double* dev_oLayer, int* dev_gtruth, int* dev_preds) { + __global__ void kernPredsN(int N, int C, double* dev_oLayer, int* dev_gtruth, int* dev_preds, double * dev_preds_probab) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { dev_preds[tid] = dev_oLayer[tid*C + dev_gtruth[tid]] > 0.5 ? dev_gtruth[tid] : (dev_gtruth[tid]==0 ? 1:0) ; + dev_preds_probab[tid] = dev_oLayer[tid*C + dev_gtruth[tid]]; } } @@ -164,43 +171,29 @@ namespace CharacterRecognition { } // kernel to compute exp softmax - __global__ void kernSoftmax(int N, int C, double* scores, double *sums) { + __global__ void kernSoftmax(int N, int C, double* scores) { int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + double sums = 0.0; + for (int i = 0; i < C; i++) { - scores[tid*C + i] = exp(scores[tid*C + i]) / sums[tid]; + sums += exp(scores[tid*C + i]); } - } - } - // kernel to exp sum across classes - __global__ void kernSumRow(int N, int C, double* scores, double *sums) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid < N) { for (int i = 0; i < C; i++) { - sums[tid] += exp(scores[tid*C + i]); + scores[tid*C + i] = exp(scores[tid*C + i]) / sums; } } } - - // kernel to init weights - __global__ void kernInitWeights(int N, double* weights) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - - if (tid < N) { - weights[tid] = 0.5; - } - - } - // kern for sigmoid // f(x) = 1/(1 + e^-x). __global__ void kernSigmoid(int N, double *idata) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) { - idata[tid] = 1.0 / (1.0 + std::exp(-idata[tid])); + idata[tid] = 1.0 / (1.0 + exp(-idata[tid])); } } @@ -244,6 +237,17 @@ namespace CharacterRecognition { } + // Ele wise addition A = A+B + __global__ void kernAddition(int N, double *dev_A, double *dev_B) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < N) { + dev_A[tid] += dev_B[tid]; + } + + } + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *lossAvgPerEpoch, const double LR) { timer().startGpuTimer(); @@ -281,11 +285,6 @@ namespace CharacterRecognition { checkCUDAErrorFn("cudaMalloc dev_oLayer failed!"); - // Allocate softmax Den holder - cudaMalloc((void**)&dev_smaxDen, N * sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_smaxDen failed!"); - - // Allocate losses holder cudaMalloc((void**)&dev_losses, N * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_losses failed!"); @@ -304,6 +303,9 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_preds, N * sizeof(int)); checkCUDAErrorFn("cudaMalloc dev_preds failed!"); + cudaMalloc((void**)&dev_preds_probab, N * sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_preds_probab failed!"); + // Allocate Weights cudaMalloc((void**)&dev_w_kj, D*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); @@ -325,28 +327,58 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_dL_dscores_2, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_dL_dscores_2 failed!"); + + // Allocate transposes cudaMalloc((void**)&dev_hLayer_T, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); cudaMalloc((void**)&dev_iLayer_T, N*D * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - //==================== - // Initialise Weights - //==================== + cudaMalloc((void**)&dev_w_ji_T, C*H*sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_w_ji_T failed!"); + + //Allocate biases + cudaMalloc((void**)&dev_b1, N*H* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + cudaMalloc((void**)&dev_b2, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + cudaMalloc((void**)&dev_db1, N*H* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + cudaMalloc((void**)&dev_db2, N*C* sizeof(double)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + //============================== + // Initialise Weights and Biases + //============================== cudaMalloc((void**)&devState, H*D* sizeof(curandState)); - kernInitCurand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, 0); + kernInitCurand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, 99); checkCUDAErrorFn("KernInitCurand failed!"); KernGenRand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, dev_w_kj); - checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); + + kernInitCurand <<<((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, 999); + checkCUDAErrorFn("KernInitCurand failed!"); + + KernGenRand <<<((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji); + checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); + + kernInitCurand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, 9); + checkCUDAErrorFn("KernInitCurand failed!"); - kernInitCurand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, 0); + KernGenRand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, dev_b2); + checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); + + kernInitCurand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, 9999); checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji); - checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + KernGenRand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, dev_b1); + checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); /*double *rand = new double[D*C]; cudaMemcpy(rand, dev_w_kj, D*C* sizeof(double), cudaMemcpyDeviceToHost); @@ -363,13 +395,15 @@ namespace CharacterRecognition { //================================================================ //======================TRAINING LOOP============================= //================================================================ - double *tmp = new double[N*H]; - double *tmp2 = new double[D*D]; + double *tmp = new double[N*N]; + double *tmp2 = new double[N*N]; double *lossesN = new double[N]; printf("Input DATA\n"); printFloatArray(N*D, idata, true); + dim3 dimBlock(blockWidth, blockWidth); + dim3 dimGrid; for (int i = 0; i < epochs; i++) { @@ -382,18 +416,18 @@ namespace CharacterRecognition { // dev_hLayer = dev_iLayer*dev_w_kj // NxH = NxD DxH - dim3 dimBlock(blockWidth, blockWidth); - dim3 dimGrid; - dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); + + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply <<> > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); + kernAddition <<< ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer,dev_b1); // Copy back to cpu //double *tmp = new double[N*H]; - cudaMemcpy(tmp, dev_hLayer, N*H* sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - printf("Post matmul [f1 = dev_iLayer*dev_w_kj]\n"); - printFloatArray(N*H, tmp, true); + //cudaMemcpy(tmp, dev_hLayer, N*H* sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post matmul [f1 = dev_iLayer*dev_w_kj]\n"); + //printFloatArray(N*H, tmp, true); // STEP 2 // X2 = Sigmoid(f1) @@ -404,24 +438,27 @@ namespace CharacterRecognition { // Copy back to cpu - cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - printf("Post sigmoid [X2 = Sigmoid(f1) ]\n"); - printFloatArray(N*H, tmp, true); + //cudaMemcpy(tmp, dev_hLayer, N*H*sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post sigmoid [X2 = Sigmoid(f1) ]\n"); + //printFloatArray(N*H, tmp, true); // STEP 3 // Scores S = W2*X2 (Matrix Mul) //================================ // dev_oLayer = dev_hLayer*dev_w_ji // NxC = NxH HxC + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_hLayer, dev_w_ji, dev_oLayer, N, H, C); + kernAddition << < ((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_oLayer, dev_b2); checkCUDAErrorFn("kernMatrixMultiply failed!"); // Copy back to cpu - cudaMemcpy(tmp, dev_oLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - printf("Post S=W2*x2\n"); - printFloatArray(N*C, tmp, true); + //cudaMemcpy(tmp, dev_oLayer, N*C*sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); + //printf("Post S=W2*x2\n"); + //printFloatArray(N*C, tmp, true); // STEP 4 // P = Softmax(S) @@ -429,21 +466,14 @@ namespace CharacterRecognition { // dev_smaxDen = Sum_Over_classses(dev_olayer) // dev_olayer = dev_olayer/Sum_Over_classses // NxC = NxC 1 - kernSumRow <<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); - kernSoftmax <<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); - checkCUDAErrorFn("kernSumRow or kernSoftmax failed!"); - - // Copy back to cpu - cudaMemcpy(tmp, dev_smaxDen, N*sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - printf("Post dev_smaxDen [dev_smaxDen = Sum_Over_classses(dev_olayer)]\n"); - printFloatArray(N, tmp, true); + kernSoftmax <<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer); + checkCUDAErrorFn("kernSoftmax failed!"); // Copy back to cpu - cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); - printFloatArray(N*C, tmp, true); + //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + //printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); + //printFloatArray(N*C, tmp, true); // STEP 5 // Compute Losses | Cross Entropy Loss @@ -453,10 +483,23 @@ namespace CharacterRecognition { checkCUDAErrorFn("kernLossPerN failed!"); // Copy back to cpu - cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); - printf("Post dev_losses [Loss = CEntropy(P)]\n"); - printFloatArray(N, lossesN, true); + //cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); + //printf("Post dev_losses [Loss = CEntropy(P)]\n"); + //printFloatArray(N, lossesN, true); + + + // Predictions + kernPredsN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_preds, dev_preds_probab); + cudaMemcpy(preds, dev_preds, N*sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds to preds failed!"); + cudaMemcpy(tmp2, dev_preds_probab, N*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds_probab to tmp failed!"); + + printf("Predictions\n"); + printArray(N, preds, true); + printFloatArray(N, tmp2, true); + // STEP 5.2 // Compute Avg of Losses @@ -467,15 +510,9 @@ namespace CharacterRecognition { // Copy back to cpu cudaMemcpy(lossAvgPerEpoch + i, dev_LossAvg, sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_LossAvg to tmp failed!"); - //printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); - + + printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); - // Predictions - kernPredsN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_preds); - cudaMemcpy(preds, dev_preds, N * sizeof(int), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds to preds failed!"); - printf("Predictions\n"); - printArray(N, preds, true); //================================================================= //========================= BACKPROP ============================== @@ -489,33 +526,33 @@ namespace CharacterRecognition { checkCUDAErrorFn("kernSetdscores failed!"); // Copy back to cpu - cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); - printf("Post setting loss at positions dev_dL_dscores \n"); - printFloatArray(N*C, tmp, true); + //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); + //printf("Post setting loss at positions dev_dL_dscores \n"); + //printFloatArray(N*C, tmp, true); kernDivNdscores << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores); checkCUDAErrorFn("kernDivNdscores failed!"); - cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); - printf("Post div by N -> setting loss at positions-> dev_dL_dscores \n"); - printFloatArray(N*C, tmp, true); + //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); + //printf("Post div by N -> setting loss at positions-> dev_dL_dscores \n"); + //printFloatArray(N*C, tmp, true); dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); - cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer to tmp failed!"); - printf("dev_hLayer \n"); - printFloatArray(N*H, tmp, true); + //cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer to tmp failed!"); + //printf("dev_hLayer \n"); + //printFloatArray(N*H, tmp, true); - cudaMemcpy(tmp, dev_hLayer_T, N*H* sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer_T to tmp failed!"); - printf("dev_hLayer_T \n"); - printFloatArray(N*H, tmp, true); + //cudaMemcpy(tmp, dev_hLayer_T, N*H* sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer_T to tmp failed!"); + //printf("dev_hLayer_T \n"); + //printFloatArray(N*H, tmp, true); dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; @@ -528,76 +565,84 @@ namespace CharacterRecognition { // STEP 2 : Gradient wrt w_kj //=========================== - // Transpose Wji - //TODO HERE - //dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; - //dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; - //kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); - break; - // Mul dev_dL_dscores * dev_w_kj == dev_dL_dscores_2 + // Transpose Wji (W2) dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_dL_dscores, dev_w_kj, dev_dL_dscores_2, N, C, C); - checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + dimGrid.y = (H + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose << > > (H, C, dev_w_ji, dev_w_ji_T); + + // Transpose Input Data + dimGrid.x = (D + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose << > > (N, D, dev_iLayer, dev_iLayer_T); + + // Mul dev_dL_dscores * dev_w_kj_T == dev_dL_dscores_2 + // NxC CxH NxH + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_dL_dscores, dev_w_ji_T, dev_dL_dscores_2, N, C, H); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dscores_2 failed!"); - // compute sig gradient on dev_hlayer - kernGradSigmoid << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_hLayer); + // compute sig gradient on dev_hlayer N*H [IN PLACE] + kernGradSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N, H, dev_hLayer); checkCUDAErrorFn("kernGradSigmoid failed!"); - //Element wise mul dev_dL_dscores_2 = dev_dL_dscores_2 . dev_hlayer[sig gradient] - kernElementProduct << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); + //Element wise mul dev_dL_dscores_2 [INPLACE] = dev_dL_dscores_2 . dev_hlayer[sig gradient] + kernElementProduct << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); checkCUDAErrorFn("kernElementProduct failed!"); - // Transpose X1 - dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + // matrix Mul final with Xi_T + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (D + dimBlock.y - 1) / dimBlock.y; - kernMatrixTranspose << > > (N, D, dev_iLayer, dev_iLayer_T); - - // matrix Mul - dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_iLayer_T, dev_dL_dscores_2, dev_dL_dw_kj, D, N, C); - checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + kernMatrixMultiply << > > (dev_iLayer_T, dev_dL_dscores_2, dev_dL_dw_kj, D, N, H); + checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_kj failed!"); //================================================================= //========================= Update Weights========================= // Update weights kj - kernUpdateWeights << <((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_dL_dw_kj, dev_w_kj, LR); - checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + kernUpdateWeights << <((D*H + blockSize - 1) / blockSize), blockSize >> > (D*H, dev_dL_dw_kj, dev_w_kj, LR); + checkCUDAErrorFn("kernUpdateWeights dev_w_kj failed!"); // InitUpdate weights ji - kernUpdateWeights << <((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_dL_dw_ji, dev_w_ji, LR); - checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); + kernUpdateWeights << <((H*C + blockSize - 1) / blockSize), blockSize >> > (H*C, dev_dL_dw_ji, dev_w_ji, LR); + checkCUDAErrorFn("kernUpdateWeights dev_w_ji failed!"); + + // Update biases1 + kernUpdateWeights << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_db1, dev_dL_dscores_2, LR); + checkCUDAErrorFn("kernUpdateWeights dev_w_kj failed!"); + + // InitUpdate biases2 + kernUpdateWeights << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_db2, dev_dL_dscores, LR); + checkCUDAErrorFn("kernUpdateWeights dev_w_ji failed!"); // COntinue to next epoch - - cudaMemcpy(tmp2, dev_w_kj, D*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("dev_w_kj memcopy failed!"); - printf("w_kj \n"); - printFloatArray(D*C, tmp2, true); - cudaMemcpy(tmp2, dev_dL_dw_kj, D*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); - printf("Dw_kj \n"); - printFloatArray(D*C, tmp2, true); + //cudaMemcpy(tmp2, dev_w_kj, D*H * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_w_kj memcopy failed!"); + //printf("w_kj \n"); + //printFloatArray(D*H, tmp2, true); + //cudaMemcpy(tmp2, dev_dL_dw_kj, D*H * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); + //printf("Dw_kj \n"); + //printFloatArray(D*H, tmp2, true); - cudaMemcpy(tmp2, dev_w_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("dev_w_ji memcopy failed!"); - printf("w_ji \n"); - printFloatArray(C*C, tmp2, true); - cudaMemcpy(tmp2, dev_dL_dw_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); - printf("Dw_ji \n"); - printFloatArray(C*C, tmp2, true); + //cudaMemcpy(tmp2, dev_w_ji, H*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_w_ji memcopy failed!"); + //printf("w_ji \n"); + //printFloatArray(H*C, tmp2, true); + //cudaMemcpy(tmp2, dev_dL_dw_ji, H*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); + //printf("Dw_ji \n"); + //printFloatArray(H*C, tmp2, true); - printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); printf("\n-----------------------------------------------------\n\n"); } printf("Finished training.\n"); + printf("losses:\n"); + printFloatArray(epochs, lossAvgPerEpoch, true); //==================== // CleanUp @@ -606,15 +651,21 @@ namespace CharacterRecognition { cudaFree(dev_hLayer); cudaFree(dev_oLayer); - cudaFree(dev_smaxDen); cudaFree(dev_losses); cudaFree(dev_gtruth); cudaFree(dev_preds); + cudaFree(dev_preds_probab); + cudaFree(dev_w_kj); cudaFree(dev_w_ji); + cudaFree(dev_b1); + cudaFree(dev_b2); + cudaFree(dev_db1); + cudaFree(dev_db2); + cudaFree(dev_dL_dw_ji); cudaFree(dev_dL_dw_kj); diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index f93a64e..2f45ed0 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -11,17 +11,28 @@ #include #include "testing_helpers.hpp" -//____CONFIG Neural Network_____________________ -const int N = 4; // 52; // Number of examples -const int D = 2; //; // Feature length per example -const int H = 2; //; // Number of Hidden unit +//====CONFIG Neural Network for XOR ================ +//================================================== + +/* +// XOR +const int N = 4; // Number of examples +const int D = 2; //Feature length per example +const int H = 2; // Number of Hidden unit const int C = 2; // NN number of classes +const double LR = 0.5; +const int epochs = 1000; +*/ -const double LR = 0.01; +// Char Recognition +const int N = 52; // Number of examples +const int D = 10201; // Feature length per example +const int H = 10; // Number of Hidden unit +const int C = 52; // NN number of classes +const double LR = 0.5; +const int epochs = 5000; -const int epochs = 100; double *losses = new double[epochs]; - double *idata = new double[N*D]; int * preds = new int[N]; int * gtruth = new int[N]; @@ -37,8 +48,8 @@ int main(int argc, char* argv[]) { printf("Launch XOR Training\n"); - + /* // XOR input dtat set 2 * 4 idata[0] = 0; idata[1] = 0; @@ -56,8 +67,11 @@ int main(int argc, char* argv[]) { gtruth[3] = 0; CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR); - printf("\nCompleted XOR Training\n"); + */ + + // Data loading + return 0; } From 0f465526e6ae1752a41bd9cc7f031e10737b7495 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Tue, 17 Sep 2019 03:09:35 -0400 Subject: [PATCH 08/76] latest working state --- .../character_recognition/mlp.cu | 62 +- .../character_recognition/mlp.cu_back | 601 +++++++++++------- .../character_recognition/mlp.h | 2 +- Project2-Character-Recognition/src/main.cpp | 41 +- .../src/main.cpp_back | 158 +++++ 5 files changed, 579 insertions(+), 285 deletions(-) create mode 100644 Project2-Character-Recognition/src/main.cpp_back diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 944974c..0d7a5ca 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -32,7 +32,7 @@ namespace CharacterRecognition { double *dev_losses; double *dev_LossAvg; - + // gtruth and preds int *dev_gtruth; int *dev_preds; @@ -156,7 +156,7 @@ namespace CharacterRecognition { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { - dev_preds[tid] = dev_oLayer[tid*C + dev_gtruth[tid]] > 0.5 ? dev_gtruth[tid] : (dev_gtruth[tid]==0 ? 1:0) ; + dev_preds[tid] = dev_oLayer[tid*C + dev_gtruth[tid]] > 0.5 ? dev_gtruth[tid] : (dev_gtruth[tid] == 0 ? 1 : 0); dev_preds_probab[tid] = dev_oLayer[tid*C + dev_gtruth[tid]]; } } @@ -232,13 +232,13 @@ namespace CharacterRecognition { for (int i = 0; i < N; i++) { sum += dev_losses[i]; } - dev_LossAvg[0] = sum/N; + dev_LossAvg[0] = sum / N; } } // Ele wise addition A = A+B - __global__ void kernAddition(int N, double *dev_A, double *dev_B) { + __global__ void kernAddition(int N, double *dev_A, double *dev_B) { int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -276,7 +276,7 @@ namespace CharacterRecognition { // Allocate hidden layer - cudaMalloc((void**)&dev_hLayer, N*H* sizeof(double)); + cudaMalloc((void**)&dev_hLayer, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer failed!"); @@ -288,8 +288,8 @@ namespace CharacterRecognition { // Allocate losses holder cudaMalloc((void**)&dev_losses, N * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_losses failed!"); - - cudaMalloc((void**)&dev_LossAvg, 1* sizeof(double)); + + cudaMalloc((void**)&dev_LossAvg, 1 * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_LossAvg failed!"); @@ -327,7 +327,7 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_dL_dscores_2, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_dL_dscores_2 failed!"); - + // Allocate transposes cudaMalloc((void**)&dev_hLayer_T, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); @@ -335,43 +335,43 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_iLayer_T, N*D * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - cudaMalloc((void**)&dev_w_ji_T, C*H*sizeof(double)); + cudaMalloc((void**)&dev_w_ji_T, C*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_ji_T failed!"); //Allocate biases - cudaMalloc((void**)&dev_b1, N*H* sizeof(double)); + cudaMalloc((void**)&dev_b1, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - cudaMalloc((void**)&dev_b2, N*C* sizeof(double)); + cudaMalloc((void**)&dev_b2, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - cudaMalloc((void**)&dev_db1, N*H* sizeof(double)); + cudaMalloc((void**)&dev_db1, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - cudaMalloc((void**)&dev_db2, N*C* sizeof(double)); + cudaMalloc((void**)&dev_db2, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); //============================== // Initialise Weights and Biases //============================== - cudaMalloc((void**)&devState, H*D* sizeof(curandState)); + cudaMalloc((void**)&devState, H*D * sizeof(curandState)); - kernInitCurand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, 99); + kernInitCurand << <((D*H + blockSize - 1) / blockSize), blockSize >> > (devState, D*H, 99); checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, dev_w_kj); + KernGenRand << <((D*H + blockSize - 1) / blockSize), blockSize >> > (devState, D*H, dev_w_kj); checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - kernInitCurand <<<((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, 999); + kernInitCurand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, 999); checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand <<<((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji); + KernGenRand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji); checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); kernInitCurand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, 9); checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, dev_b2); + KernGenRand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, dev_b2); checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); kernInitCurand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, 9999); @@ -419,8 +419,8 @@ namespace CharacterRecognition { dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply <<> > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); - kernAddition <<< ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer,dev_b1); + kernMatrixMultiply << > > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); + kernAddition << < ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer, dev_b1); // Copy back to cpu //double *tmp = new double[N*H]; @@ -466,7 +466,7 @@ namespace CharacterRecognition { // dev_smaxDen = Sum_Over_classses(dev_olayer) // dev_olayer = dev_olayer/Sum_Over_classses // NxC = NxC 1 - kernSoftmax <<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer); + kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer); checkCUDAErrorFn("kernSoftmax failed!"); // Copy back to cpu @@ -491,9 +491,9 @@ namespace CharacterRecognition { // Predictions kernPredsN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_preds, dev_preds_probab); - cudaMemcpy(preds, dev_preds, N*sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(preds, dev_preds, N * sizeof(int), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds to preds failed!"); - cudaMemcpy(tmp2, dev_preds_probab, N*sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(tmp2, dev_preds_probab, N * sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds_probab to tmp failed!"); printf("Predictions\n"); @@ -505,12 +505,12 @@ namespace CharacterRecognition { // Compute Avg of Losses //================================== // Dumb Reduction - + kernReduction << <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses, dev_LossAvg); // Copy back to cpu cudaMemcpy(lossAvgPerEpoch + i, dev_LossAvg, sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_LossAvg to tmp failed!"); - + printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); @@ -519,7 +519,7 @@ namespace CharacterRecognition { // STEP 1 : Gradient wrt w_ji // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; - cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C* sizeof(double), cudaMemcpyDeviceToDevice); + cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToDevice); checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); @@ -560,7 +560,7 @@ namespace CharacterRecognition { kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, H, N, C); checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); - + //=========================== // STEP 2 : Gradient wrt w_kj //=========================== @@ -581,7 +581,7 @@ namespace CharacterRecognition { dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_dL_dscores, dev_w_ji_T, dev_dL_dscores_2, N, C, H); checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dscores_2 failed!"); - + // compute sig gradient on dev_hlayer N*H [IN PLACE] kernGradSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N, H, dev_hLayer); checkCUDAErrorFn("kernGradSigmoid failed!"); @@ -625,7 +625,7 @@ namespace CharacterRecognition { //checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); //printf("Dw_kj \n"); //printFloatArray(D*H, tmp2, true); - + //cudaMemcpy(tmp2, dev_w_ji, H*C * sizeof(double), cudaMemcpyDeviceToHost); //checkCUDAErrorFn("dev_w_ji memcopy failed!"); //printf("w_ji \n"); @@ -652,7 +652,7 @@ namespace CharacterRecognition { cudaFree(dev_oLayer); cudaFree(dev_losses); - + cudaFree(dev_gtruth); cudaFree(dev_preds); cudaFree(dev_preds_probab); diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu_back b/Project2-Character-Recognition/character_recognition/mlp.cu_back index fce922c..210e535 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu_back +++ b/Project2-Character-Recognition/character_recognition/mlp.cu_back @@ -11,35 +11,47 @@ namespace CharacterRecognition { - using Common::PerformanceTimer; - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } - + using Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + // Initlialiations - + //layers - double *dev_iLayer; - double *dev_hLayer; - double *dev_oLayer; - double *dev_smaxDen; - double *dev_losses; + float *dev_iLayer; + float *dev_hLayer; + float *dev_oLayer; + + //float *dev_b1; + //float *dev_b2; + //float *dev_db1; + //float *dev_db2; + + float *dev_losses; + float *dev_LossAvg; + + // gtruth and preds int *dev_gtruth; + int *dev_preds; + float * dev_preds_probab; //weights - double *dev_w_kj; - double *dev_w_ji; + float *dev_w1; + float *dev_w2; //Derivatives - double *dev_dL_dw_ji; - double *dev_dL_dw_kj; - double *dev_dL_dscores; - double *dev_dL_dscores_2; + float *dev_dw2; + float *dev_dw1; + float *dev_dscores; + float *dev_dscores_2; + + float *dev_hLayer_T; + float *dev_iLayer_T; + float *dev_w2_T; - double *dev_hLayer_T; - double *dev_iLayer_T; //============================================= // Rnadom Number Generation using cuRand on GPU @@ -53,17 +65,18 @@ namespace CharacterRecognition { } } - __global__ void KernGenRand(curandState *state, int N, double *w) { + __global__ void KernGenRand(curandState *state, int N, float *w) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { - w[tid] = 2.0*curand_uniform(&state[tid]) - 1; // Between -1 and 1 + w[tid] = ((2.0*curand_uniform(&state[tid])) - 1.0); // Between -1 and 1 } } //=================================================================== //=====KERNEL DEFNITIONS FOR Forward and Backward==================== //=================================================================== - + + void printArray(int n, int *a, bool abridged = false) { printf(" [ "); for (int i = 0; i < n; i++) { @@ -73,9 +86,9 @@ namespace CharacterRecognition { } printf("%3d ", a[i]); } - printf("]\n"); + printf("]\n\n"); } - void printFloatArray(int n, double *a, bool abridged = false) { + void printFloatArray(int n, float *a, bool abridged = false) { printf(" [ "); for (int i = 0; i < n; i++) { if (abridged && i + 2 == 15 && n > 16) { @@ -84,12 +97,13 @@ namespace CharacterRecognition { } printf("%3f ", a[i]); } - printf("]\n"); + printf("]\n\n"); } + // Kernel for Gradient update on Weights - __global__ void kernUpdateWeights(int N, double *dev_dw, double *dev_w, double LR) { + __global__ void kernUpdateWeights(int N, float *dev_dw, float *dev_w, float LR) { int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -99,17 +113,17 @@ namespace CharacterRecognition { } // Kernel for derivative of sigmoid - __global__ void kernGradSigmoid(int N, int C, double *dev_hLayer) { + __global__ void kernGradSigmoid(int N, int H, float *dev_hLayer) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - - if (tid < N*C) { + + if (tid < N*H) { dev_hLayer[tid] = dev_hLayer[tid] * (1 - dev_hLayer[tid]); } } // Matrix Transpose - __global__ void kernMatrixTranspose(int N, int C, double *matrix, double *matrix_T) { + __global__ void kernMatrixTranspose(int N, int C, float *matrix, float *matrix_T) { int row = blockIdx.y * blockDim.y + threadIdx.y; int col = blockIdx.x * blockDim.x + threadIdx.x; @@ -120,75 +134,71 @@ namespace CharacterRecognition { } // Divide by N - __global__ void kernDivNdscores(int N, int C, double *dev_dL_dscores) { - + __global__ void kernDivNdscores(int N, int C, float *dev_dscores) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N*C) { - dev_dL_dscores[tid] /= N; + dev_dscores[tid] /= N; } } // Compute dscores gradient - __global__ void kernSetdscores(int N, int C, double *dev_dL_dscores, int *dev_gtruth) { + __global__ void kernSetdscores(int N, int C, float *dev_dscores, int *dev_gtruth) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { - dev_dL_dscores[tid*C + dev_gtruth[tid]] -= 1; + dev_dscores[tid*C + dev_gtruth[tid]] -= 1; } } - // compute loss per example - __global__ void kernLossPerN(int N, int C, double* dev_oLayer, int* dev_gtruth, double* dev_losses) { + // compute predictions + __global__ void kernPredsN(int N, int C, float* dev_oLayer, int* dev_gtruth, int* dev_preds, float * dev_preds_probab) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { - dev_losses[tid] = -log(dev_oLayer[tid*C + dev_gtruth[tid]]); + dev_preds[tid] = dev_oLayer[tid*C + dev_gtruth[tid]] > 0.5 ? dev_gtruth[tid] : (dev_gtruth[tid]==0 ? 1:0) ; + dev_preds_probab[tid] = dev_oLayer[tid*C + dev_gtruth[tid]]; } } - // kernel to compute exp softmax - __global__ void kernSoftmax(int N, int C, double* scores, double *sums) { + // compute loss per example + __global__ void kernLossPerN(int N, int C, float* dev_oLayer, int* dev_gtruth, float* dev_losses) { int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { - for (int i = 0; i < C; i++) { - scores[tid*C + i] = exp(scores[tid*C + i]) / sums[tid]; - } + dev_losses[tid] = (float)(-log(dev_oLayer[tid*C + dev_gtruth[tid]])); } } - // kernel to exp sum across classes - __global__ void kernSumRow(int N, int C, double* scores, double *sums) { + // kernel to compute exp softmax + __global__ void kernSoftmax(int N, int C, float* scores) { int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + float sums = 0.0; + for (int i = 0; i < C; i++) { - sums[tid] += exp(scores[tid*C + i]); + sums += exp(scores[tid*C + i]); } - } - } - - // kernel to init weights - __global__ void kernInitWeights(int N, double* weights) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - - if (tid < N) { - weights[tid] = 0.5; + for (int i = 0; i < C; i++) { + scores[tid*C + i] = exp(scores[tid*C + i]) / sums; + } } - } // kern for sigmoid // f(x) = 1/(1 + e^-x). - __global__ void kernSigmoid(int N, double *idata) { + __global__ void kernSigmoid(int N, float *idata) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) { - idata[tid] = 1.0 / (1.0 + std::exp(-idata[tid])); + idata[tid] = 1.0 / (1.0 + exp(-idata[tid])); } } - + // kern for element wise product - __global__ void kernElementProduct(int N, double *matrixA, double* matrixB, double* matrixC) { + __global__ void kernElementProduct(int N, float *matrixA, float* matrixB, float* matrixC) { int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -199,193 +209,259 @@ namespace CharacterRecognition { // kernel to to matmul // A mxn // B nxk // C mxk - __global__ void kernMatrixMultiply(const double *dev_A, const double *dev_B, double *dev_C, int m, int n, int k) { + __global__ void kernMatrixMultiply(const float *dev_A, const float *dev_B, float *dev_C, int m, int n, int k) { int row = blockIdx.y * blockDim.y + threadIdx.y; int col = blockIdx.x * blockDim.x + threadIdx.x; - double sum = 0; + float sum = 0; if (col < k && row < m) { - for (int i = 0; i < n; i++) + for (int i = 0; i < n; i++) { sum += dev_A[row * n + i] * dev_B[i * k + col]; + } dev_C[row * k + col] = sum; } } - + // Dumb reduction - __global__ void kernReduction(int N, double *dev_losses) { + __global__ void kernReduction(int N, float *dev_losses, float *dev_LossAvg) { int tid = blockIdx.x * blockDim.x + threadIdx.x; - double sum = 0.0; + float sum = 0.0; if (tid == 0) { for (int i = 0; i < N; i++) { - sum += dev_losses[tid]; + sum += dev_losses[i]; } - dev_losses[N-1]=sum; + dev_LossAvg[0] = sum/N; } } - void trainMLP(int N, int D, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR) { - + // Ele wise addition A = A+B + __global__ void kernAddition(int N, float *dev_A, float *dev_B) { + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < N) { + dev_A[tid] += dev_B[tid]; + } + + } + + void trainMLP(int N, int D, int H, int C, float *idata, int *preds, int *gtruth, int epochs, float *lossAvgPerEpoch, const float LR, unsigned long seed) { + timer().startGpuTimer(); // N = number of examples // D = dim of each example + // H = Hidden layer nodes // C = number of classes // NETWORK DEFITION_____________ - // Compute f1 = W1*X1 + // Compute f1 = W1*X1 + b1 // Compute X2 = Sig(f1) - // Compute Scroes S = W2*X2 + // Compute Scroes S = W2*X2 + b2 // Compute Probab P = Softmax(S) // Compute Loss L = CEntropy(P) //================================================================ //======================INITIALIZATIONS=========================== //================================================================ - + + printf("\nN = %d \n", N); + printf("D = %d \n", D); + printf("H = %d \n", H); + printf("C = %d \n", C); + // Allocate input layer - cudaMalloc((void**)&dev_iLayer, N*D*sizeof(double)); + cudaMalloc((void**)&dev_iLayer, N*D * sizeof(float)); checkCUDAErrorFn("cudaMalloc dev_iLayer failed!"); - - cudaMemcpy(dev_iLayer, idata, N*D*sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(dev_iLayer, idata, N*D * sizeof(float), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from idata to dev_iLayer failed!"); - // Allocate hidden layer - cudaMalloc((void**)&dev_hLayer, N*C* sizeof(double)); + cudaMalloc((void**)&dev_hLayer, N*H* sizeof(float)); checkCUDAErrorFn("cudaMalloc dev_hLayer failed!"); - // Allocate output layer - cudaMalloc((void**)&dev_oLayer, N*C* sizeof(double)); + cudaMalloc((void**)&dev_oLayer, N*C* sizeof(float)); checkCUDAErrorFn("cudaMalloc dev_oLayer failed!"); - // Allocate softmax Den holder - cudaMalloc((void**)&dev_smaxDen, N* sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_smaxDen failed!"); - - // Allocate losses holder - cudaMalloc((void**)&dev_losses, N*sizeof(double)); + cudaMalloc((void**)&dev_losses, N * sizeof(float)); checkCUDAErrorFn("cudaMalloc dev_losses failed!"); + cudaMalloc((void**)&dev_LossAvg, 1* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_LossAvg failed!"); - // Allocate gtruth holder - cudaMalloc((void**)&dev_gtruth , N * sizeof(int)); + // Allocate gtruth and preds + cudaMalloc((void**)&dev_gtruth, N*sizeof(int)); checkCUDAErrorFn("cudaMalloc dev_gtruth failed!"); - - cudaMemcpy(dev_gtruth, gtruth, N*sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(dev_gtruth, gtruth, N * sizeof(int), cudaMemcpyHostToDevice); checkCUDAErrorFn("cudaMemcpyToSymbol from gtruth to dev_gtruth failed!"); + cudaMalloc((void**)&dev_preds, N * sizeof(int)); + checkCUDAErrorFn("cudaMalloc dev_preds failed!"); + + cudaMalloc((void**)&dev_preds_probab, N * sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_preds_probab failed!"); // Allocate Weights - cudaMalloc((void**)&dev_w_kj, D*C*sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); + cudaMalloc((void**)&dev_w1, D*H* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_w1 failed!"); + + cudaMalloc((void**)&dev_w2, C*H* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_w2 failed!"); - cudaMalloc((void**)&dev_w_ji, C*C* sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); - // Allocate Derivatives - cudaMalloc((void**)&dev_dL_dw_kj, D*C* sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); + cudaMalloc((void**)&dev_dw1, D*H* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_w1 failed!"); - cudaMalloc((void**)&dev_dL_dw_ji, C*C* sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); + cudaMalloc((void**)&dev_dw2, H*C* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_w2 failed!"); - cudaMalloc((void**)&dev_dL_dscores, N*C*sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_dL_dscores failed!"); + cudaMalloc((void**)&dev_dscores, N*C* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_dscores failed!"); - cudaMalloc((void**)&dev_dL_dscores_2, N*C * sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_dL_dscores_2 failed!"); + cudaMalloc((void**)&dev_dscores_2, N*C* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_dscores_2 failed!"); + + + // Allocate transposes + cudaMalloc((void**)&dev_hLayer_T, N*H * sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - cudaMalloc((void**)&dev_hLayer_T, N*C* sizeof(double)); + cudaMalloc((void**)&dev_iLayer_T, N*D * sizeof(float)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + + cudaMalloc((void**)&dev_w2_T, C*H*sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_w2_T failed!"); - cudaMalloc((void**)&dev_iLayer_T, N*D* sizeof(double)); + /* + //Allocate biases + cudaMalloc((void**)&dev_b1, N*H* sizeof(float)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + cudaMalloc((void**)&dev_b2, N*C* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - // Initialise Weights + cudaMalloc((void**)&dev_db1, N*H* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - kernInitCurand << <((D*C + blockSize - 1) / blockSize), blockSize >> > (devState, D*C, 27); - checkCUDAErrorFn("KernInitCurand failed!"); + cudaMalloc((void**)&dev_db2, N*C* sizeof(float)); + checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!");*/ - KernGenRand << <((D*C + blockSize - 1) / blockSize), blockSize >> > (devState, D*C, dev_w_kj); - checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + //============================== + // Initialise Weights and Biases + //============================== + cudaMalloc((void**)&devState, H*D* sizeof(curandState)); - kernInitCurand << <((C*C + blockSize - 1) / blockSize), blockSize >> > (devState, C*C, 18); + kernInitCurand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, seed); //XOR = 99 checkCUDAErrorFn("KernInitCurand failed!"); + KernGenRand <<<((D*H + blockSize - 1) / blockSize), blockSize >>> (devState, D*H, dev_w1); + checkCUDAErrorFn("KernGenRand dev_w1 failed!"); - KernGenRand << <((C*C + blockSize - 1) / blockSize), blockSize >> > (devState, C*C, dev_w_ji); - checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); - - //kernInitWeights <<<((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_w_kj); - //checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); + kernInitCurand <<<((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, seed+1);//XOR = 999 + checkCUDAErrorFn("KernInitCurand failed!"); + KernGenRand <<<((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w2); + checkCUDAErrorFn("KernGenRand dev_w1 failed!"); + + //kernInitCurand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, seed+2);//XOR = 9 + //checkCUDAErrorFn("KernInitCurand failed!"); + //KernGenRand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, dev_b2); + //checkCUDAErrorFn("KernGenRand dev_w1 failed!"); + + //kernInitCurand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, seed+3);//XOR = 9999 + //checkCUDAErrorFn("KernInitCurand failed!"); + //KernGenRand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, dev_b1); + //checkCUDAErrorFn("KernGenRand dev_w1 failed!"); + + /* + float *rand = new float[D*H]; + cudaMemcpy(rand, dev_w1, D*H* sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w1 to rand failed!"); + printf("Post random inits dev_w1 - \n"); + printFloatArray(D*H, rand, true); + + float *rand2 = new float[H*C]; + cudaMemcpy(rand2, dev_w2, H*C * sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w2 to rand failed!"); + printf("Post random inits dev_w2 - \n"); + printFloatArray(H*C, rand2, true);*/ + + + float *tmp = new float[N*D]; + float *tmp2 = new float[N*D]; + float *tmp3 = new float[N*D]; + float *lossesN = new float[N]; + + printf("\nInput DATA\n"); + printFloatArray(N*D, idata, true); + dim3 dimBlock(blockWidth, blockWidth); + dim3 dimGrid; - //kernInitWeights <<<((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_w_ji); - //checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); - double *tmp = new double[N*C]; - double *tmp2 = new double[D*D]; //================================================================ //======================TRAINING LOOP============================= //================================================================ - - for (int i = 0; i < epochs; i++) { - + + for (int ep = 0; ep < epochs; ep++) { + //================================================================ //========================= FORWARD ============================== - + // STEP 1 // f1 = W1*X1 (Matrix Mul) //================================= - // dev_hLayer = dev_iLayer*dev_w_kj - // NxC = NxD DxC + // dev_hLayer = dev_iLayer * dev_w1 + // NxH = NxD DxH - dim3 dimBlock(blockWidth, blockWidth); - dim3 dimGrid; - dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply <<>> (dev_iLayer, dev_w_kj, dev_hLayer, N, D, C); + + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_iLayer, dev_w1, dev_hLayer, N, D, H); + //kernAddition <<< ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer,dev_b1); + //checkCUDAErrorFn("kernAddition failed!"); // Copy back to cpu + cudaMemcpy(tmp, dev_hLayer, N*H*sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_hLayer to tmp failed!"); + printf("Post matmul [f1 = dev_iLayer*dev_w1]\n"); + printFloatArray(N*H, tmp, true); - //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post matmul\n"); - //printFloatArray(N*C, tmp, true); - // STEP 2 // X2 = Sigmoid(f1) //================================ // dev_hLayer = sigmoid(dev_hLayer) - // NxC = NxC - kernSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_hLayer); + // NxH = NxH + kernSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer); // Copy back to cpu - //cudaMemcpy(tmp, dev_hLayer, N*C* sizeof(double), cudaMemcpyDeviceToHost); + //cudaMemcpy(tmp, dev_hLayer, N*H*sizeof(float), cudaMemcpyDeviceToHost); //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post sigmoid\n"); - //printFloatArray(N*C, tmp, true); + //printf("Post sigmoid [X2 = Sigmoid(f1) ]\n"); + //printFloatArray(N*H, tmp, true); // STEP 3 // Scores S = W2*X2 (Matrix Mul) //================================ - // dev_oLayer = dev_hLayer*dev_w_ji - // NxC = NxC CxC - kernMatrixMultiply <<>> (dev_hLayer, dev_w_ji, dev_oLayer, N, C, C); + // dev_oLayer = dev_hLayer*dev_w2 + // NxC = NxH HxC + dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_hLayer, dev_w2, dev_oLayer, N, H, C); checkCUDAErrorFn("kernMatrixMultiply failed!"); + //kernAddition << < ((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_oLayer, dev_b2); + //checkCUDAErrorFn("kernAddition failed!"); // Copy back to cpu - //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //cudaMemcpy(tmp3, dev_oLayer, N*C * sizeof(float), cudaMemcpyDeviceToHost); //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post S=W2*x2\n"); - //printFloatArray(N*C, tmp, true); + //printf("Post SCORES =W2*x2\n"); + //printFloatArray(N*C, tmp3, true); // STEP 4 // P = Softmax(S) @@ -393,150 +469,211 @@ namespace CharacterRecognition { // dev_smaxDen = Sum_Over_classses(dev_olayer) // dev_olayer = dev_olayer/Sum_Over_classses // NxC = NxC 1 - kernSumRow<<<((N + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_oLayer, dev_smaxDen); - kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_smaxDen); - checkCUDAErrorFn("kernSumRow or kernSoftmax failed!"); + kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer); + checkCUDAErrorFn("kernSoftmax failed!"); // Copy back to cpu - //cudaMemcpy(tmp, dev_smaxDen, N*sizeof(double), cudaMemcpyDeviceToHost); + //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(float), cudaMemcpyDeviceToHost); //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - //printf("Post dev_smaxDen\n"); - //printFloatArray(N, tmp, true); + //printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); + //printFloatArray(N*C, tmp, true); - // Copy back to cpu - cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - printf("Post Softmax\n"); - printFloatArray(N*C, tmp, true); - // Compute Loss | Cross Entropy Loss + // STEP 5 + // Compute Losses | Cross Entropy Loss //================================== // Compute Loss L = CEntropy(P) - kernLossPerN<<<((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_losses); + kernLossPerN <<<((N + blockSize - 1) / blockSize), blockSize >>>(N, C, dev_oLayer, dev_gtruth, dev_losses); checkCUDAErrorFn("kernLossPerN failed!"); // Copy back to cpu - cudaMemcpy(tmp, dev_losses, N*sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); - printf("Post dev_losses\n"); - printFloatArray(N, tmp, true); + //cudaMemcpy(lossesN, dev_losses, N * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); + //printf("Post dev_losses [Loss = CEntropy(P)]\n"); + //printFloatArray(N, lossesN, true); + + // Predictions + kernPredsN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_preds, dev_preds_probab); + cudaMemcpy(preds, dev_preds, N*sizeof(int), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds to preds failed!"); + + cudaMemcpy(tmp2, dev_preds_probab, N*sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds_probab to tmp2 failed!"); + + printf("Predictions\n"); + printArray(N, preds, true); + printFloatArray(N, tmp2, true); + + + // STEP 5.2 + // Compute Avg of Losses + //================================== // Dumb Reduction - kernReduction<< <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses); + + kernReduction << <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses, dev_LossAvg); // Copy back to cpu - cudaMemcpy(tmp, dev_losses+N-1, sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to tmp failed!"); - printf("Epoch: %3d | dev_loss %3f \n", i, tmp[0]); + cudaMemcpy(lossAvgPerEpoch + ep, dev_LossAvg, sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_LossAvg to lossAvgPerEpoch failed!"); - // Track loss here - losses[i] = tmp[0]; + printf("Epoch: %d | LossAvg %3f \n", ep, lossAvgPerEpoch[ep]); + //================================================================= //========================= BACKPROP ============================== - + // STEP 1 : Gradient wrt w_ji - // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; - cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C*sizeof(double), cudaMemcpyDeviceToDevice); - checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); - - kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); + // dW_ji = Probs_k - [1](gth == k) dev_dscores; + cudaMemcpy(dev_dscores, dev_oLayer, N*C* sizeof(float), cudaMemcpyDeviceToDevice); + checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dscores failed!"); + + kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dscores, dev_gtruth); checkCUDAErrorFn("kernSetdscores failed!"); - kernDivNdscores <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_dL_dscores); + // Copy back to cpu + //cudaMemcpy(tmp, dev_dscores, N*C * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dscores to tmp failed!"); + //printf("Post setting loss at positions dev_dscores \n"); + //printFloatArray(N*C, tmp, true); + + kernDivNdscores << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dscores); checkCUDAErrorFn("kernDivNdscores failed!"); - dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixTranspose <<> > (N, C, dev_hLayer, dev_hLayer_T); + //cudaMemcpy(tmp, dev_dscores, N*C * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dscores to tmp failed!"); + //printf("Post div by N -> setting loss at positions-> dev_dscores \n"); + //printFloatArray(N*C, tmp, true); + + + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, C, N, C); - checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + kernMatrixMultiply << > > (dev_hLayer_T, dev_dscores, dev_dw2, H, N, C); + checkCUDAErrorFn("kernMatrixMultiply for dev_dw2 failed!"); + + //=========================== // STEP 2 : Gradient wrt w_kj + //=========================== - // Mul dscores * dev_w_kj == dev_dL_dscores_2 + // Transpose Wji (W2) dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_dL_dscores, dev_w_kj, dev_dL_dscores_2, N, C, C); - checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); - - // compute sig gradient on dev_hlayer - kernGradSigmoid <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N, C, dev_hLayer); + dimGrid.y = (H + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose << > > (H, C, dev_w2, dev_w2_T); + + // Transpose Input Data + dimGrid.x = (D + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixTranspose << > > (N, D, dev_iLayer, dev_iLayer_T); + + // Mul dev_dscores * dev_w1_T == dev_dscores_2 + // NxC CxH NxH + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; + dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; + kernMatrixMultiply << > > (dev_dscores, dev_w2_T, dev_dscores_2, N, C, H); + checkCUDAErrorFn("kernMatrixMultiply for dev_dscores_2 failed!"); + + // compute sig gradient on dev_hlayer N*H [IN PLACE] + kernGradSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N, H, dev_hLayer); checkCUDAErrorFn("kernGradSigmoid failed!"); - //Element wise mul dev_dL_dscores_2 = dev_dL_dscores_2 . dev_hlayer[sig gradient] - kernElementProduct <<<((N*C + blockSize - 1) / blockSize), blockSize >>> (N*C, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); + //Element wise mul dev_dscores_2 [INPLACE] = dev_dscores_2 . dev_hlayer[sig gradient] + kernElementProduct << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_dscores_2, dev_hLayer, dev_dscores_2); checkCUDAErrorFn("kernElementProduct failed!"); - // Transpose X1 - dimGrid.x = (N + dimBlock.x - 1) / dimBlock.x; + // matrix Mul final with Xi_T + dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (D + dimBlock.y - 1) / dimBlock.y; - kernMatrixTranspose <<>> (N, D, dev_iLayer, dev_iLayer_T); - - // matrix Mul - dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; - kernMatrixMultiply << > > (dev_iLayer_T, dev_dL_dscores_2, dev_dL_dw_kj, D, N, C); - checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); + kernMatrixMultiply << > > (dev_iLayer_T, dev_dscores_2, dev_dw1, D, N, H); + checkCUDAErrorFn("kernMatrixMultiply for dev_dw1 failed!"); //================================================================= //========================= Update Weights========================= - // Update weights kj - kernUpdateWeights << <((D*C + blockSize - 1) / blockSize), blockSize >> > (D*C, dev_dL_dw_kj, dev_w_kj, LR); - checkCUDAErrorFn("kernInitWeights dev_w_kj failed!"); - - // Update weights ji - kernUpdateWeights << <((C*C + blockSize - 1) / blockSize), blockSize >> > (C*C, dev_dL_dw_ji, dev_w_ji, LR); - checkCUDAErrorFn("kernInitWeights dev_w_ji failed!"); - - // Continue to next epoch + // Update weights1 + kernUpdateWeights << <((D*H + blockSize - 1) / blockSize), blockSize >> > (D*H, dev_dw1, dev_w1, LR); + checkCUDAErrorFn("kernUpdateWeights dev_w1 failed!"); + + // InitUpdate weights2 + kernUpdateWeights << <((H*C + blockSize - 1) / blockSize), blockSize >> > (H*C, dev_dw2, dev_w2, LR); + checkCUDAErrorFn("kernUpdateWeights dev_w2 failed!"); + + // Update biases1 + //kernUpdateWeights << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_db1, dev_dscores_2, LR); + //checkCUDAErrorFn("kernUpdateWeights dev_db1 failed!"); + + // InitUpdate biases2 + //kernUpdateWeights << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_db2, dev_dscores, LR); + //checkCUDAErrorFn("kernUpdateWeights dev_db2 failed!"); + + // COntinue to next epoch + //cudaMemcpy(tmp2, dev_w1, D*H * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_w1 memcopy failed!"); + //printf("w_kj \n"); + //printFloatArray(D*H, tmp2, true); + //cudaMemcpy(tmp2, dev_dw1, D*H * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_dw1 memcopy failed!"); + //printf("Dw_kj \n"); + //printFloatArray(D*H, tmp2, true); - cudaMemcpy(tmp2, dev_dL_dw_kj, D*C*sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); - printFloatArray(D*C, tmp2, true); - cudaMemcpy(tmp2, dev_dL_dw_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); - printFloatArray(C*C, tmp2, true); + //cudaMemcpy(tmp2, dev_w2, H*C * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_w2 memcopy failed!"); + //printf("w_ji \n"); + //printFloatArray(H*C, tmp2, true); + //cudaMemcpy(tmp2, dev_dw2, H*C * sizeof(float), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("dev_dw2 memcopy failed!"); + //printf("Dw_ji \n"); + //printFloatArray(H*C, tmp2, true); - printf("\n"); + //printf("\n-----------------------------------------------------\n\n"); } + printf("Finished training.\n"); - printf("Free Device Buffers.\n"); - + printf("losses:\n"); + printFloatArray(epochs, lossAvgPerEpoch, true); + //==================== // CleanUp //==================== cudaFree(dev_iLayer); cudaFree(dev_hLayer); cudaFree(dev_oLayer); - - cudaFree(dev_smaxDen); + cudaFree(dev_losses); + cudaFree(dev_gtruth); + cudaFree(dev_preds); + cudaFree(dev_preds_probab); - cudaFree(dev_w_kj); - cudaFree(dev_w_ji); - - cudaFree(dev_dL_dw_ji); - cudaFree(dev_dL_dw_kj); + cudaFree(dev_w1); + cudaFree(dev_w2); + + /* + cudaFree(dev_b1); + cudaFree(dev_b2); + cudaFree(dev_db1); + cudaFree(dev_db2); + */ - cudaFree(dev_dL_dscores); - cudaFree(dev_dL_dscores_2); + cudaFree(dev_dw2); + cudaFree(dev_dw1); + + cudaFree(dev_dscores); + cudaFree(dev_dscores_2); cudaFree(dev_hLayer_T); cudaFree(dev_iLayer_T); delete(tmp); delete(tmp2); - - checkCUDAErrorFn("cudaFree failed!"); + delete(tmp3); timer().endGpuTimer(); - } + } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 7943f19..3912887 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -3,7 +3,7 @@ #include "common.h" namespace CharacterRecognition { - Common::PerformanceTimer& timer(); + Common::PerformanceTimer& timer(); // TODO: implement required elements for MLP sections 1 and 2 here // MLP section 1 and 2 Character Reader diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 2f45ed0..bd6c3f2 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -11,27 +11,27 @@ #include #include "testing_helpers.hpp" -//====CONFIG Neural Network for XOR ================ -//================================================== - -/* -// XOR -const int N = 4; // Number of examples -const int D = 2; //Feature length per example -const int H = 2; // Number of Hidden unit -const int C = 2; // NN number of classes -const double LR = 0.5; -const int epochs = 1000; -*/ + //====CONFIG Neural Network for XOR ================ + //================================================== + + + // XOR + const int N = 4; // Number of examples + const int D = 2; //Feature length per example + const int H = 2; // Number of Hidden unit + const int C = 2; // NN number of classes + const double LR = 0.5; + const int epochs = 1000; -// Char Recognition + /* + // Char Recognition const int N = 52; // Number of examples const int D = 10201; // Feature length per example const int H = 10; // Number of Hidden unit const int C = 52; // NN number of classes const double LR = 0.5; const int epochs = 5000; - + */ double *losses = new double[epochs]; double *idata = new double[N*D]; int * preds = new int[N]; @@ -39,17 +39,16 @@ int * gtruth = new int[N]; int main(int argc, char* argv[]) { - // Scan tests + // Scan tests - printf("\n"); - printf("****************\n"); - printf("***MLP TESTS***\n"); - printf("****************\n"); + printf("\n"); + printf("****************\n"); + printf("***MLP TESTS***\n"); + printf("****************\n"); printf("Launch XOR Training\n"); - /* // XOR input dtat set 2 * 4 idata[0] = 0; idata[1] = 0; @@ -68,7 +67,7 @@ int main(int argc, char* argv[]) { CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR); printf("\nCompleted XOR Training\n"); - */ + // Data loading diff --git a/Project2-Character-Recognition/src/main.cpp_back b/Project2-Character-Recognition/src/main.cpp_back new file mode 100644 index 0000000..abcacb9 --- /dev/null +++ b/Project2-Character-Recognition/src/main.cpp_back @@ -0,0 +1,158 @@ +/** + * @file main.cpp + * @brief Stream compaction test program + * @authors Kai Ninomiya + * @date 2015 + * @copyright University of Pennsylvania + */ + +#include +#include +#include +#include "testing_helpers.hpp" + +#include +#include +#include +#include +using namespace std; +unsigned long seed = time(0); +//====CONFIG Neural Network for XOR ================ +//================================================== + + +// XOR +const int N = 4; // Number of examples +const int D = 3; //Feature length per example +const int H = 4; // Number of Hidden unit +const int C = 2; // NN number of classes +const float LR = 0.1; +const int epochs = 1000; + +/* +//====CONFIG Neural Network for Char Recognition === +//================================================== +const int N = 52; // Number of examples +const int D = 10201; // Feature length per example +const int H = 10; // Number of Hidden unit +const int C = 52; // NN number of classes +const float LR = 0.01; +const int epochs = 5000; +*/ + + +int main(int argc, char* argv[]) { + + float *losses = new float[epochs]; + float *idata = new float[N*D]; + int * preds = new int[N]; + int * gtruth = new int[N]; + + printf("\n"); + printf("****************\n"); + printf("***MLP TESTS***\n"); + printf("****************\n"); + + printf("Launch XOR Training\n"); + // XOR input dtat set 2 * 4 + /* + idata[0] = 0; + idata[1] = 0; + idata[2] = 0; + idata[3] = 1; + idata[4] = 1; + idata[5] = 0; + idata[6] = 1; + idata[7] = 1; + */ + + // with bias + idata[0] = 0; + idata[1] = 0; + idata[2] = 1; + idata[3] = 0; + idata[4] = 1; + idata[5] = 1; + idata[6] = 1; + idata[7] = 0; + idata[8] = 1; + idata[9] = 1; + idata[10] = 1; + idata[11] = 1; + + + // XOR input dtat set 2 * 4 + gtruth[0] = 0; + gtruth[1] = 1; + gtruth[2] = 1; + gtruth[3] = 0; + + CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR, seed); + printf("\nCompleted XOR Training\n"); + + /* + // Data loading + printf("Trying to load data.\n"); + int data_sz = 0; + int x = 0; + + string line; + int *id = new int[N*D]; + for (int i = 1; i <= 52; i++) { + std::string fname; + if (i < 10) { + fname="C:\\Users\\chhavis\\cis565\\Project2-Number-Algorithms\\Project2-Character-Recognition\\data-set\\0" + std::to_string(i) + "info.txt"; + } + else { + fname = "C:\\Users\\chhavis\\cis565\\Project2-Number-Algorithms\\Project2-Character-Recognition\\data-set\\" + std::to_string(i) + "info.txt"; + } + std::ifstream myfile(fname); + std::stringstream sstream; + std::stringstream sstream2; + std::stringstream sstream3; + + //std::cout<> gtruth[i-1]; + gtruth[i-1] -= 1; + printf(" gtruth = %d |", gtruth[i-1]); + + // Read line 2 // Data Size + getline(myfile, line); + sstream2 << line; + sstream2 >> data_sz; + //printf("data_sz = %d \n", data_sz); + + // Read line 3 Pixel values + getline(myfile, line); + sstream3 << line; + for (int j = 0; j < data_sz; j++){ + sstream3 >> id[(i-1)*10201 + j]; + } + + myfile.close(); + } + else { + printf("Unable to open file.\n");; + } + } + + // Normalize Data + for (int i = 0; i < N*D; i++) { + idata[i] = id[i]/255.0; + //printf("\t %lf ", idata[i]); + } + delete(id); + + CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR, seed); + printf("\nCompleted CharRec Training\n"); + */ + + return 0; +} From 93f10ed1a7ce3b258243ec5ab2527492ba1dcb8b Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Tue, 17 Sep 2019 14:03:48 -0400 Subject: [PATCH 09/76] Completed XOR and Char Rec | Raw --- .../character_recognition/mlp.cu | 138 +++++++++++------- .../character_recognition/mlp.h | 2 +- Project2-Character-Recognition/src/main.cpp | 133 ++++++++++++++--- 3 files changed, 197 insertions(+), 76 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 0d7a5ca..b5bb821 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -108,7 +108,7 @@ namespace CharacterRecognition { int tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid < N) { - dev_w[tid] += -LR * dev_dw[tid]; + dev_w[tid] = dev_w[tid] - (LR * dev_dw[tid]); } } @@ -123,13 +123,16 @@ namespace CharacterRecognition { } // Matrix Transpose - __global__ void kernMatrixTranspose(int N, int C, double *matrix, double *matrix_T) { + __global__ void kernMatrixTranspose(int rows, int cols, double *matrix, double *matrix_T) { - int row = blockIdx.y * blockDim.y + threadIdx.y; - int col = blockIdx.x * blockDim.x + threadIdx.x; + int idy = blockIdx.y * blockDim.y + threadIdx.y; + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (idx < cols && idy < rows) { + int pos = idy * cols + idx; + int tpos = idx * rows + idy; - if (col < C && row < N) { - matrix_T[C*row + col] = matrix[N*col + row]; + matrix_T[tpos] = matrix[pos]; } } @@ -165,8 +168,15 @@ namespace CharacterRecognition { __global__ void kernLossPerN(int N, int C, double* dev_oLayer, int* dev_gtruth, double* dev_losses) { int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < N) { + //printf("tid = %d \n", tid); + //printf("tid*C = %d \n", tid*C); + //printf("tid*C + gtruth = %d \n", tid*C + dev_gtruth[tid]); + //printf("dev_oLayer[tid*C + dev_gtruth[tid]] = %0.3f \n", dev_oLayer[ tid*C + dev_gtruth[tid] ]); + //printf("dev_oLayer[tid*C + dev_gtruth[tid]] = %0.3f \n", log(dev_oLayer[tid*C + dev_gtruth[tid]])); dev_losses[tid] = -log(dev_oLayer[tid*C + dev_gtruth[tid]]); + } } @@ -193,17 +203,17 @@ namespace CharacterRecognition { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) { - idata[tid] = 1.0 / (1.0 + exp(-idata[tid])); + idata[tid] = 1.0 / (1.0 + exp(-1*idata[tid])); } } // kern for element wise product - __global__ void kernElementProduct(int N, double *matrixA, double* matrixB, double* matrixC) { + __global__ void kernElementProduct(int N, double *matrixA, double* matrixB) { int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < N) { - matrixC[tid] = matrixA[tid] * matrixB[tid]; + matrixA[tid] = matrixA[tid] * matrixB[tid]; } } @@ -248,7 +258,7 @@ namespace CharacterRecognition { } - void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *lossAvgPerEpoch, const double LR) { + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *lossAvgPerEpoch, const double LR, unsigned long seed) { timer().startGpuTimer(); @@ -276,12 +286,12 @@ namespace CharacterRecognition { // Allocate hidden layer - cudaMalloc((void**)&dev_hLayer, N*H * sizeof(double)); + cudaMalloc((void**)&dev_hLayer, N*H* sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer failed!"); // Allocate output layer - cudaMalloc((void**)&dev_oLayer, N*C * sizeof(double)); + cudaMalloc((void**)&dev_oLayer, N*C* sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_oLayer failed!"); @@ -289,7 +299,7 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_losses, N * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_losses failed!"); - cudaMalloc((void**)&dev_LossAvg, 1 * sizeof(double)); + cudaMalloc((void**)&dev_LossAvg, 1*sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_LossAvg failed!"); @@ -307,18 +317,18 @@ namespace CharacterRecognition { checkCUDAErrorFn("cudaMalloc dev_preds_probab failed!"); // Allocate Weights - cudaMalloc((void**)&dev_w_kj, D*H * sizeof(double)); + cudaMalloc((void**)&dev_w_kj, D*H * sizeof(double)); //w1 checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); - cudaMalloc((void**)&dev_w_ji, C*H * sizeof(double)); + cudaMalloc((void**)&dev_w_ji, C*H * sizeof(double)); //w2 checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); // Allocate Derivatives - cudaMalloc((void**)&dev_dL_dw_kj, D*H * sizeof(double)); + cudaMalloc((void**)&dev_dL_dw_kj, D*H * sizeof(double)); //dw1 checkCUDAErrorFn("cudaMalloc dev_w_kj failed!"); - cudaMalloc((void**)&dev_dL_dw_ji, C*H * sizeof(double)); + cudaMalloc((void**)&dev_dL_dw_ji, C*H * sizeof(double)); //dw2 checkCUDAErrorFn("cudaMalloc dev_w_ji failed!"); cudaMalloc((void**)&dev_dL_dscores, N*C * sizeof(double)); @@ -338,6 +348,7 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_w_ji_T, C*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_ji_T failed!"); + /* //Allocate biases cudaMalloc((void**)&dev_b1, N*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); @@ -350,35 +361,34 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_db2, N*C * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); + */ //============================== // Initialise Weights and Biases //============================== cudaMalloc((void**)&devState, H*D * sizeof(curandState)); - kernInitCurand << <((D*H + blockSize - 1) / blockSize), blockSize >> > (devState, D*H, 99); + kernInitCurand << <((D*H + blockSize - 1) / blockSize), blockSize >> > (devState, D*H, seed); checkCUDAErrorFn("KernInitCurand failed!"); - - KernGenRand << <((D*H + blockSize - 1) / blockSize), blockSize >> > (devState, D*H, dev_w_kj); + KernGenRand << <((D*H + blockSize - 1) / blockSize), blockSize >> > (devState, D*H, dev_w_kj);//w1 checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - kernInitCurand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, 999); + kernInitCurand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, seed); checkCUDAErrorFn("KernInitCurand failed!"); - - KernGenRand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji); + KernGenRand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji);//w2 checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - kernInitCurand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, 9); + /* + kernInitCurand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, 9); //9 checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, dev_b2); checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - kernInitCurand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, 9999); + kernInitCurand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, 9999); //9999 checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, dev_b1); checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); + */ /*double *rand = new double[D*C]; cudaMemcpy(rand, dev_w_kj, D*C* sizeof(double), cudaMemcpyDeviceToHost); @@ -395,10 +405,10 @@ namespace CharacterRecognition { //================================================================ //======================TRAINING LOOP============================= //================================================================ - double *tmp = new double[N*N]; - double *tmp2 = new double[N*N]; + double *tmp = new double[N*D]; + double *tmp2 = new double[N*D]; double *lossesN = new double[N]; - + int *tmpint = new int[N]; printf("Input DATA\n"); printFloatArray(N*D, idata, true); @@ -420,7 +430,8 @@ namespace CharacterRecognition { dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); - kernAddition << < ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer, dev_b1); + + //kernAddition << < ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer, dev_b1); // Copy back to cpu //double *tmp = new double[N*H]; @@ -451,7 +462,7 @@ namespace CharacterRecognition { dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_hLayer, dev_w_ji, dev_oLayer, N, H, C); - kernAddition << < ((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_oLayer, dev_b2); + //kernAddition << < ((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_oLayer, dev_b2); checkCUDAErrorFn("kernMatrixMultiply failed!"); // Copy back to cpu @@ -470,10 +481,16 @@ namespace CharacterRecognition { checkCUDAErrorFn("kernSoftmax failed!"); // Copy back to cpu - //cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - //printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); - //printFloatArray(N*C, tmp, true); + cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); + printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); + printFloatArray(N*C, tmp, true); + + // print check + //cudaMemcpy(tmpint, dev_gtruth, N * sizeof(int), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_gtruth to tmpint failed!"); + //printf("Print gtruth \n"); + //printArray(N, tmpint, true); // STEP 5 // Compute Losses | Cross Entropy Loss @@ -483,10 +500,10 @@ namespace CharacterRecognition { checkCUDAErrorFn("kernLossPerN failed!"); // Copy back to cpu - //cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); - //printf("Post dev_losses [Loss = CEntropy(P)]\n"); - //printFloatArray(N, lossesN, true); + cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); + printf("Post dev_losses [Loss = CEntropy(P)]\n"); + printFloatArray(N, lossesN, true); // Predictions @@ -513,7 +530,7 @@ namespace CharacterRecognition { printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); - + //================================================================= //========================= BACKPROP ============================== @@ -522,9 +539,15 @@ namespace CharacterRecognition { cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToDevice); checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); + kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); checkCUDAErrorFn("kernSetdscores failed!"); + //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to dev_dL_dscores failed!"); + //printf("dev_dL_dscores \n"); + //printFloatArray(N*C, tmp, true); + // Copy back to cpu //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); @@ -534,9 +557,10 @@ namespace CharacterRecognition { kernDivNdscores << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores); checkCUDAErrorFn("kernDivNdscores failed!"); + //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); - //printf("Post div by N -> setting loss at positions-> dev_dL_dscores \n"); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to dev_dL_dscores failed!"); + //printf("dev_dL_dscores /N\n"); //printFloatArray(N*C, tmp, true); @@ -544,19 +568,20 @@ namespace CharacterRecognition { dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); - //cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer to tmp failed!"); - //printf("dev_hLayer \n"); - //printFloatArray(N*H, tmp, true); + /* + cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer to tmp failed!"); + printf("dev_hLayer \n"); + printFloatArray(N*H, tmp, true); - //cudaMemcpy(tmp, dev_hLayer_T, N*H* sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer_T to tmp failed!"); - //printf("dev_hLayer_T \n"); - //printFloatArray(N*H, tmp, true); + cudaMemcpy(tmp, dev_hLayer_T, N*H* sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer_T to tmp failed!"); + printf("dev_hLayer_T \n"); + printFloatArray(N*H, tmp, true);*/ dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = (C + dimBlock.y - 1) / dimBlock.y; + dimGrid.y = (H + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, H, N, C); checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); @@ -586,8 +611,9 @@ namespace CharacterRecognition { kernGradSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N, H, dev_hLayer); checkCUDAErrorFn("kernGradSigmoid failed!"); + //Element wise mul dev_dL_dscores_2 [INPLACE] = dev_dL_dscores_2 . dev_hlayer[sig gradient] - kernElementProduct << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_dL_dscores_2, dev_hLayer, dev_dL_dscores_2); + kernElementProduct << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_dL_dscores_2, dev_hLayer); checkCUDAErrorFn("kernElementProduct failed!"); // matrix Mul final with Xi_T @@ -600,14 +626,15 @@ namespace CharacterRecognition { //================================================================= //========================= Update Weights========================= - // Update weights kj + // Update weights kj W1 kernUpdateWeights << <((D*H + blockSize - 1) / blockSize), blockSize >> > (D*H, dev_dL_dw_kj, dev_w_kj, LR); checkCUDAErrorFn("kernUpdateWeights dev_w_kj failed!"); - // InitUpdate weights ji + // InitUpdate weights ji W2 kernUpdateWeights << <((H*C + blockSize - 1) / blockSize), blockSize >> > (H*C, dev_dL_dw_ji, dev_w_ji, LR); checkCUDAErrorFn("kernUpdateWeights dev_w_ji failed!"); + /* // Update biases1 kernUpdateWeights << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_db1, dev_dL_dscores_2, LR); checkCUDAErrorFn("kernUpdateWeights dev_w_kj failed!"); @@ -615,6 +642,7 @@ namespace CharacterRecognition { // InitUpdate biases2 kernUpdateWeights << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_db2, dev_dL_dscores, LR); checkCUDAErrorFn("kernUpdateWeights dev_w_ji failed!"); + */ // COntinue to next epoch //cudaMemcpy(tmp2, dev_w_kj, D*H * sizeof(double), cudaMemcpyDeviceToHost); diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 3912887..94a2925 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -11,6 +11,6 @@ namespace CharacterRecognition { //void readData(int N, int P, int iDim, int hDim, int oDim); //void testMLP(int N, int P, int iDim, int hDim, int oDim); - void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR); + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR, unsigned long seed ); } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index bd6c3f2..685e5a4 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -11,19 +11,28 @@ #include #include "testing_helpers.hpp" +#include +#include +#include +#include +#include + +unsigned long seed = time(0); + //====CONFIG Neural Network for XOR ================ //================================================== - +/* // XOR const int N = 4; // Number of examples const int D = 2; //Feature length per example - const int H = 2; // Number of Hidden unit + const int H = 4; // Number of Hidden unit const int C = 2; // NN number of classes const double LR = 0.5; const int epochs = 1000; + */ - /* + // Char Recognition const int N = 52; // Number of examples const int D = 10201; // Feature length per example @@ -31,7 +40,9 @@ const int H = 10; // Number of Hidden unit const int C = 52; // NN number of classes const double LR = 0.5; const int epochs = 5000; - */ + + + double *losses = new double[epochs]; double *idata = new double[N*D]; int * preds = new int[N]; @@ -45,31 +56,113 @@ int main(int argc, char* argv[]) { printf("****************\n"); printf("***MLP TESTS***\n"); printf("****************\n"); - + /* printf("Launch XOR Training\n"); - - // XOR input dtat set 2 * 4 - idata[0] = 0; - idata[1] = 0; - idata[2] = 0; - idata[3] = 1; - idata[4] = 1; - idata[5] = 0; - idata[6] = 1; - idata[7] = 1; - - // XOR input dtat set 2 * 4 + // XOR input data set 2*4 + idata[0] = 0.0; + idata[1] = 0.0; + idata[2] = 0.0; + idata[3] = 1.0; + idata[4] = 1.0; + idata[5] = 0.0; + idata[6] = 1.0; + idata[7] = 1.0; + + // XOR ground truth data set 4 gtruth[0] = 0; gtruth[1] = 1; gtruth[2] = 1; gtruth[3] = 0; - CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR); + CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR, seed); printf("\nCompleted XOR Training\n"); - - + // STORE LOSSES + std::ofstream myfile("xor_losses.txt"); + if (myfile.is_open()) + { + for (int i = 0; i < epochs; i++) { + myfile << std::fixed << std::setprecision(8) << losses[i]<<'\n'; + } + myfile.close(); + } + + */ + //========================================================================================== + //========================================================================================== + + printf("Launch CharRec Training\n"); // Data loading + printf("Loading data...\n"); + int data_sz = 0; + int x = 0; + + std::string line; + int *id = new int[N*D]; + for (int i = 1; i <= 52; i++) { + std::string fname; + if (i < 10) { + fname = "C:\\Users\\chhavis\\cis565\\Project2-Number-Algorithms\\Project2-Character-Recognition\\data-set\\0" + std::to_string(i) + "info.txt"; + } + else { + fname = "C:\\Users\\chhavis\\cis565\\Project2-Number-Algorithms\\Project2-Character-Recognition\\data-set\\" + std::to_string(i) + "info.txt"; + } + std::ifstream myfile(fname); + std::stringstream sstream; + std::stringstream sstream2; + std::stringstream sstream3; + + //std::cout<> gtruth[i - 1]; + gtruth[i - 1] -= 1; + printf(" gtruth = %d |", gtruth[i - 1]); + + // Read line 2 // Data Size + getline(myfile, line); + sstream2 << line; + sstream2 >> data_sz; + //printf("data_sz = %d \n", data_sz); + + // Read line 3 Pixel values + getline(myfile, line); + sstream3 << line; + for (int j = 0; j < data_sz; j++) { + sstream3 >> id[(i - 1) * 10201 + j]; + } + + myfile.close(); + } + else { + printf("Unable to open file.\n");; + } + } + + // Normalize Data + for (int i = 0; i < N*D; i++) { + idata[i] = id[i] / 255.0; + //printf("\t %lf ", idata[i]); + } + delete(id); + + CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR, seed); + printf("\nCompleted CharRec Training\n"); + + // STORE LOSSES + std::ofstream myfile("cr_losses.txt"); + if (myfile.is_open()) + { + for (int i = 0; i < epochs; i++) { + myfile << std::fixed << std::setprecision(8) << losses[i] << '\n'; + } + myfile.close(); + } return 0; From dfa2f26533e07ab091f79dceb3ecd4d6821da792 Mon Sep 17 00:00:00 2001 From: CHHAVI SHARMA Date: Tue, 17 Sep 2019 15:42:23 -0400 Subject: [PATCH 10/76] Character Recognition Completed --- .../build/cr_W1_DxH_10201_x_10.txt | 0 .../build/cr_W2_HxC_10_x_52.txt | 520 ++ .../build/cr_losses.txt | 5001 +++++++++++++++++ .../build/xor_W1_DxH_2_x_4.txt | 8 + .../build/xor_W2_HxC_4_x_2.txt | 8 + .../build/xor_losses.txt | 1001 ++++ .../character_recognition/mlp.cu | 265 +- .../character_recognition/mlp.h | 3 +- .../img/CharRecStats.PNG | Bin 0 -> 47006 bytes .../img/XorStats.PNG | Bin 0 -> 38219 bytes Project2-Character-Recognition/src/main.cpp | 319 +- 11 files changed, 6802 insertions(+), 323 deletions(-) create mode 100644 Project2-Character-Recognition/build/cr_W1_DxH_10201_x_10.txt create mode 100644 Project2-Character-Recognition/build/cr_W2_HxC_10_x_52.txt create mode 100644 Project2-Character-Recognition/build/cr_losses.txt create mode 100644 Project2-Character-Recognition/build/xor_W1_DxH_2_x_4.txt create mode 100644 Project2-Character-Recognition/build/xor_W2_HxC_4_x_2.txt create mode 100644 Project2-Character-Recognition/build/xor_losses.txt create mode 100644 Project2-Character-Recognition/img/CharRecStats.PNG create mode 100644 Project2-Character-Recognition/img/XorStats.PNG diff --git a/Project2-Character-Recognition/build/cr_W1_DxH_10201_x_10.txt b/Project2-Character-Recognition/build/cr_W1_DxH_10201_x_10.txt new file mode 100644 index 0000000..e69de29 diff --git a/Project2-Character-Recognition/build/cr_W2_HxC_10_x_52.txt b/Project2-Character-Recognition/build/cr_W2_HxC_10_x_52.txt new file mode 100644 index 0000000..44b3708 --- /dev/null +++ b/Project2-Character-Recognition/build/cr_W2_HxC_10_x_52.txt @@ -0,0 +1,520 @@ +0.02234437 +2.44178838 +2.12211869 +2.86910908 +1.39794079 +-3.37807807 +1.79872024 +-3.25161667 +-5.14672692 +-3.82594618 +-4.36467364 +1.05448366 +1.34464836 +-2.70558592 +-1.29232861 +-4.45322911 +1.12452646 +0.05129583 +-1.53145817 +2.29747285 +-0.17928053 +1.22056949 +1.85546884 +2.26923765 +7.72128419 +0.56676446 +-0.59855473 +0.34405155 +-0.07450361 +-2.81415459 +2.08598835 +-0.47959715 +-1.18020942 +-3.54638591 +0.49263043 +3.51805568 +0.28332691 +0.94279276 +0.10403002 +3.55367626 +-0.20846972 +-0.57336788 +-1.63715663 +-4.42079677 +3.62606363 +-4.75628906 +1.71687876 +-3.60419377 +-0.58926909 +1.12312184 +-0.93837230 +1.02161558 +-1.99439051 +-1.66877795 +-0.47272724 +-1.92237566 +-0.65147517 +-1.67161539 +-1.65340364 +-0.41736635 +-0.96666603 +-2.00534589 +-0.24940603 +-0.21780509 +-0.68745857 +-1.76812782 +-1.69955534 +-0.91664097 +-1.99045331 +-2.08961946 +1.95625394 +-1.35927350 +2.16917533 +-1.45013560 +4.34069940 +-1.78569307 +-1.36826806 +-2.40150731 +-1.72052882 +-1.42870622 +-1.93580023 +-1.57120953 +3.93023604 +-1.50947076 +3.96687650 +-0.98050795 +3.15903624 +-1.84092854 +2.30948191 +3.71612203 +2.93363918 +-1.22285724 +-3.64187775 +-2.52899566 +2.22525671 +4.57807446 +-0.29206579 +-1.33727831 +3.30435509 +3.51734746 +2.41451160 +3.46033995 +2.73766160 +2.78666063 +-0.56444583 +-1.50717050 +2.90448926 +-0.50944431 +2.57287118 +-1.24795231 +1.44276649 +-0.33453929 +3.85329059 +-0.06194339 +2.74633881 +-2.66863912 +3.86157067 +-1.10027464 +3.44725168 +-0.31827424 +-0.99496430 +4.85996637 +3.58155703 +3.97230211 +-1.32438874 +4.11562237 +-1.76102091 +-2.47773409 +-0.77006312 +-2.01230207 +-0.46187672 +-1.14037968 +-1.33359197 +-1.90759140 +-2.74224019 +-1.69564899 +-0.81759004 +-2.25480444 +4.99607051 +-1.00420779 +-0.94399773 +-0.12900643 +2.53208776 +-0.61161108 +-1.61346279 +-1.25051345 +-0.93662990 +0.01209936 +-1.59083979 +-0.64290910 +-1.17560385 +-0.91370719 +-1.29934883 +-1.49271788 +-0.38279559 +-0.87281425 +0.87195025 +1.88068890 +1.20685565 +-3.23215833 +0.65898479 +4.46368446 +0.30732024 +2.88912314 +2.00189056 +2.66182735 +0.00416151 +-2.85011285 +2.51673143 +4.74300932 +0.49546370 +-4.29365896 +1.38348189 +-2.85051105 +1.03518557 +-2.48453524 +-3.74340421 +2.22373576 +-2.62697394 +-2.58897643 +-1.83618233 +2.56299083 +-1.42150899 +1.52180253 +-3.30263784 +4.34358648 +-1.85626629 +-3.62798116 +0.89870843 +1.56615258 +-2.04684895 +-2.67556129 +-3.48254747 +2.77137837 +-2.68175027 +-2.83070828 +0.59285769 +0.58829105 +-3.31627902 +0.60370787 +-2.73067211 +1.86065981 +2.66664819 +1.92799047 +1.46272360 +2.00129662 +0.62389047 +-2.19732016 +-3.22605052 +2.28552456 +2.61076783 +2.77234982 +2.07733036 +-3.67155053 +0.33752553 +-2.06917430 +2.60614139 +3.27261984 +2.82254237 +4.37907355 +-5.20032356 +4.59297057 +1.08857794 +5.01668379 +1.71066513 +-2.19201261 +1.25287245 +-1.71080786 +2.01223504 +-2.35519499 +-2.90179015 +-2.47115809 +-2.63651532 +-3.06549187 +-2.93057829 +-2.34808294 +2.16435845 +-2.27738377 +-2.32156673 +2.50111263 +0.77389352 +1.71260555 +-1.73274959 +-2.41800194 +2.78133318 +-2.80577541 +-0.27508988 +6.04277712 +1.75554821 +1.02978271 +0.78041520 +2.03958239 +-3.24255352 +3.32588491 +-1.98929991 +-2.28297877 +1.52067562 +1.68655415 +-3.21364777 +-1.95381174 +-2.06233206 +-2.22298768 +-1.80348311 +-2.05830494 +-1.87995171 +-0.67577089 +-0.44382271 +4.20706794 +-1.18228443 +-1.37365493 +-1.05650687 +-2.02966709 +-1.27287134 +-0.31277148 +3.25713817 +-0.84268740 +-2.64515693 +3.33530698 +1.77635318 +-1.33245430 +2.02754064 +-1.90175529 +-1.78627701 +4.27331574 +-1.75607897 +2.95675335 +3.45109021 +-3.38702814 +-2.92380651 +-0.93511930 +4.33888915 +3.38764241 +1.55577853 +-1.76272342 +2.81499956 +-0.81972380 +2.71453472 +-1.76005896 +-2.09843488 +-0.70750318 +3.30562858 +2.72980051 +2.06350419 +-1.40164804 +-1.46211969 +-1.65860465 +-1.65001866 +-1.17519148 +3.55159733 +-3.65599789 +2.51833760 +-1.08035097 +1.40048027 +-2.21005077 +-2.79129714 +3.86463169 +-2.60703264 +5.21058972 +-2.77459353 +-1.81943360 +-2.41254703 +3.75876841 +-2.00997201 +-2.90532230 +-2.57997251 +-2.84595063 +1.37901057 +5.26427982 +1.63151655 +2.35976606 +0.07321900 +-1.62854140 +1.25562611 +2.53801401 +3.85063761 +3.35605763 +-2.16352863 +1.26890311 +1.78032953 +2.15428869 +1.32205728 +-2.15165015 +-2.80287224 +1.72602561 +-2.91310689 +2.34373051 +-2.06496560 +-1.88494825 +-2.15968417 +-1.91505968 +1.84094830 +-3.63674926 +-3.30054277 +0.63488729 +0.34412194 +1.70240709 +-2.31547881 +-2.42724158 +-2.81194571 +2.56395007 +1.19455114 +0.90831936 +0.73689405 +-1.39378618 +1.43796898 +-2.72640276 +1.25008425 +-2.43610241 +1.27555608 +-1.63298194 +2.15128850 +-1.36079828 +2.87842394 +-3.20078726 +1.88037636 +4.73869734 +2.95859981 +-2.47676072 +1.20086879 +-1.64876827 +-2.21245796 +-1.37292240 +-2.64926221 +-1.99366862 +-2.61197146 +-2.42037390 +-1.69700797 +-2.61727877 +-2.00283254 +2.80193074 +2.17398867 +2.74828299 +1.89859703 +1.63000254 +-1.22019192 +2.98193246 +2.71323761 +4.82065895 +-1.23986136 +-1.71608723 +-2.46404070 +-1.55481777 +3.14083856 +-2.97212520 +2.31152158 +-2.42709810 +2.38669934 +-0.98088669 +6.13672183 +-1.51858463 +-1.35090733 +-1.64022275 +-2.63873572 +-0.36994333 +2.38701700 +-1.91253877 +2.45372541 +2.79834039 +-3.35927343 +-0.55435167 +1.28266945 +-1.02396327 +-1.81623533 +3.79640024 +-1.10465280 +-0.96389700 +-1.21746508 +-2.00081054 +-0.81447816 +-2.71045453 +-0.98633027 +-0.47366725 +-2.00061472 +-1.12570771 +-1.57260963 +-1.33590128 +-2.11516036 +-0.82271302 +-0.96687719 +-1.51235854 +-2.32042104 +-3.22214651 +3.58840832 +3.10634936 +2.55341604 +-1.48530011 +-1.78709120 +-1.97663591 +1.34015604 +-1.73106926 +-0.37102147 +5.97811702 +3.15291421 +3.02307008 +-1.22484752 +-2.06195501 +2.06280045 +2.27850558 +2.13215110 +2.40756292 +-1.08814103 +5.30066516 +-2.48557407 +-0.35968480 +-1.71889374 +-2.27288506 +-1.76290175 +3.23610464 +4.10088359 +-1.11843755 +-2.61634186 +-2.11321485 +-2.59093557 +-1.04686311 +4.35423497 +-0.56777076 +-2.99893667 +-1.05259207 +4.71422665 +-0.42004211 +-1.76466408 +-1.38104584 +-1.72265757 +-0.26025632 +3.89225868 +0.78622880 +2.45642204 +4.95748122 +3.58781801 +-1.03226293 +-1.31419006 +-1.42850982 +-1.39648520 +-1.24103914 +-1.29710668 +-3.31151068 +3.07427276 +5.19890627 +-0.53040221 +-1.62574959 +-1.61546083 +-1.12818533 +-1.35332518 +-1.79835719 +-1.76282834 +-2.20464367 +-0.93834226 +-1.17917553 +-0.74051930 +3.19793449 +2.46613493 +-1.67891551 +-0.80699423 +-0.12919697 +3.16991714 +3.64328970 +-2.82860894 +-1.89732238 +2.49245552 +3.98195807 diff --git a/Project2-Character-Recognition/build/cr_losses.txt b/Project2-Character-Recognition/build/cr_losses.txt new file mode 100644 index 0000000..15e73c2 --- /dev/null +++ b/Project2-Character-Recognition/build/cr_losses.txt @@ -0,0 +1,5001 @@ +4.79193339 +4.60569768 +4.48124055 +4.39336828 +4.33439360 +4.28509756 +4.23347534 +4.18263887 +4.14351092 +4.10743282 +4.07348860 +4.04139590 +4.01077620 +3.98135895 +3.95294559 +3.92533144 +3.89827878 +3.87144730 +3.84415824 +3.81521365 +3.78592231 +3.75778205 +3.72956850 +3.70276501 +3.67628343 +3.64768263 +3.62117750 +3.59816343 +3.57558692 +3.55315742 +3.53084184 +3.50865991 +3.48636639 +3.46412264 +3.44269267 +3.42210845 +3.40214974 +3.38243467 +3.36226867 +3.33984163 +3.31278679 +3.29084664 +3.27150324 +3.25256752 +3.23285809 +3.21110362 +3.19023648 +3.17016317 +3.14832593 +3.12725486 +3.10881560 +3.09122119 +3.07417761 +3.05750782 +3.04102286 +3.02453607 +3.00775865 +2.99005032 +2.97168614 +2.95551539 +2.94026955 +2.92528140 +2.91033008 +2.89516204 +2.87967046 +2.86452108 +2.85008493 +2.83595408 +2.82191963 +2.80782384 +2.79336274 +2.77768495 +2.75922036 +2.74255028 +2.72886113 +2.71565082 +2.70268778 +2.68989077 +2.67722656 +2.66468697 +2.65227237 +2.63997016 +2.62774211 +2.61552910 +2.60327521 +2.59100265 +2.57887739 +2.56702581 +2.55540030 +2.54392549 +2.53255581 +2.52126065 +2.51001206 +2.49877899 +2.48752986 +2.47625488 +2.46501057 +2.45391156 +2.44301831 +2.43230390 +2.42172498 +2.41125309 +2.40087159 +2.39056979 +2.38033973 +2.37017441 +2.36006686 +2.35000919 +2.33999169 +2.33000167 +2.32002188 +2.31002835 +2.29998774 +2.28985197 +2.27953935 +2.26886766 +2.25748588 +2.24569236 +2.23505067 +2.22514529 +2.21546769 +2.20592062 +2.19646926 +2.18709131 +2.17776467 +2.16846048 +2.15913851 +2.14975008 +2.14026763 +2.13075874 +2.12139761 +2.11228341 +2.10337281 +2.09460239 +2.08593585 +2.07735465 +2.06884835 +2.06041034 +2.05203594 +2.04372160 +2.03546445 +2.02726206 +2.01911223 +2.01101293 +2.00296213 +1.99495775 +1.98699761 +1.97907932 +1.97120036 +1.96335813 +1.95555024 +1.94777512 +1.94003290 +1.93232648 +1.92466200 +1.91704771 +1.90949122 +1.90199676 +1.89456460 +1.88719258 +1.87987779 +1.87261751 +1.86540939 +1.85825153 +1.85114227 +1.84408022 +1.83706419 +1.83009312 +1.82316609 +1.81628228 +1.80944097 +1.80264150 +1.79588327 +1.78916574 +1.78248841 +1.77585081 +1.76925252 +1.76269312 +1.75617224 +1.74968951 +1.74324459 +1.73683714 +1.73046685 +1.72413341 +1.71783653 +1.71157593 +1.70535133 +1.69916247 +1.69300909 +1.68689093 +1.68080775 +1.67475931 +1.66874538 +1.66276572 +1.65682011 +1.65090832 +1.64503015 +1.63918537 +1.63337377 +1.62759515 +1.62184928 +1.61613598 +1.61045504 +1.60480624 +1.59918940 +1.59360430 +1.58805076 +1.58252857 +1.57703753 +1.57157745 +1.56614813 +1.56074938 +1.55538099 +1.55004277 +1.54473452 +1.53945606 +1.53420719 +1.52898770 +1.52379742 +1.51863615 +1.51350369 +1.50839985 +1.50332445 +1.49827729 +1.49325819 +1.48826694 +1.48330338 +1.47836731 +1.47345854 +1.46857689 +1.46372218 +1.45889422 +1.45409283 +1.44931783 +1.44456904 +1.43984628 +1.43514938 +1.43047815 +1.42583241 +1.42121200 +1.41661675 +1.41204647 +1.40750100 +1.40298017 +1.39848380 +1.39401173 +1.38956380 +1.38513983 +1.38073967 +1.37636314 +1.37201009 +1.36768035 +1.36337376 +1.35909017 +1.35482941 +1.35059133 +1.34637578 +1.34218258 +1.33801160 +1.33386267 +1.32973565 +1.32563038 +1.32154671 +1.31748449 +1.31344358 +1.30942381 +1.30542505 +1.30144715 +1.29748996 +1.29355333 +1.28963712 +1.28574118 +1.28186537 +1.27800954 +1.27417355 +1.27035725 +1.26656050 +1.26278316 +1.25902507 +1.25528611 +1.25156611 +1.24786493 +1.24418243 +1.24051845 +1.23687286 +1.23324549 +1.22963621 +1.22604485 +1.22247126 +1.21891529 +1.21537678 +1.21185559 +1.20835154 +1.20486450 +1.20139432 +1.19794084 +1.19450394 +1.19108350 +1.18767941 +1.18429159 +1.18091999 +1.17756459 +1.17422540 +1.17090248 +1.16759593 +1.16430586 +1.16103240 +1.15777569 +1.15453581 +1.15131280 +1.14810663 +1.14491719 +1.14174429 +1.13858770 +1.13544713 +1.13232227 +1.12921281 +1.12611844 +1.12303886 +1.11997378 +1.11692290 +1.11388595 +1.11086260 +1.10785252 +1.10485535 +1.10187066 +1.09889797 +1.09593669 +1.09298612 +1.09004539 +1.08711344 +1.08418895 +1.08127026 +1.07835532 +1.07544170 +1.07252665 +1.06960744 +1.06668224 +1.06375160 +1.06082046 +1.05789912 +1.05500114 +1.05213766 +1.04931264 +1.04652324 +1.04376365 +1.04102842 +1.03831349 +1.03561620 +1.03293486 +1.03026836 +1.02761598 +1.02497721 +1.02235167 +1.01973910 +1.01713924 +1.01455192 +1.01197698 +1.00941426 +1.00686364 +1.00432501 +1.00179825 +0.99928326 +0.99677994 +0.99428821 +0.99180797 +0.98933915 +0.98688165 +0.98443541 +0.98200034 +0.97957637 +0.97716343 +0.97476144 +0.97237034 +0.96999005 +0.96762050 +0.96526164 +0.96291339 +0.96057569 +0.95824848 +0.95593168 +0.95362525 +0.95132911 +0.94904321 +0.94676748 +0.94450187 +0.94224632 +0.94000076 +0.93776514 +0.93553940 +0.93332349 +0.93111734 +0.92892091 +0.92673414 +0.92455696 +0.92238934 +0.92023121 +0.91808251 +0.91594321 +0.91381323 +0.91169254 +0.90958108 +0.90747880 +0.90538564 +0.90330156 +0.90122651 +0.89916044 +0.89710329 +0.89505502 +0.89301557 +0.89098491 +0.88896299 +0.88694974 +0.88494514 +0.88294913 +0.88096166 +0.87898269 +0.87701217 +0.87505006 +0.87309631 +0.87115088 +0.86921372 +0.86728479 +0.86536404 +0.86345143 +0.86154692 +0.85965047 +0.85776202 +0.85588155 +0.85400900 +0.85214434 +0.85028751 +0.84843850 +0.84659724 +0.84476370 +0.84293785 +0.84111963 +0.83930901 +0.83750595 +0.83571042 +0.83392236 +0.83214175 +0.83036855 +0.82860271 +0.82684420 +0.82509297 +0.82334901 +0.82161225 +0.81988268 +0.81816024 +0.81644491 +0.81473665 +0.81303542 +0.81134119 +0.80965392 +0.80797357 +0.80630011 +0.80463351 +0.80297372 +0.80132072 +0.79967447 +0.79803494 +0.79640209 +0.79477589 +0.79315630 +0.79154330 +0.78993684 +0.78833689 +0.78674343 +0.78515642 +0.78357582 +0.78200161 +0.78043375 +0.77887221 +0.77731696 +0.77576797 +0.77422520 +0.77268862 +0.77115821 +0.76963393 +0.76811575 +0.76660364 +0.76509756 +0.76359750 +0.76210342 +0.76061528 +0.75913307 +0.75765674 +0.75618628 +0.75472164 +0.75326280 +0.75180974 +0.75036242 +0.74892081 +0.74748489 +0.74605462 +0.74462998 +0.74321094 +0.74179747 +0.74038954 +0.73898713 +0.73759021 +0.73619875 +0.73481271 +0.73343209 +0.73205684 +0.73068694 +0.72932236 +0.72796308 +0.72660906 +0.72526029 +0.72391674 +0.72257837 +0.72124517 +0.71991710 +0.71859414 +0.71727627 +0.71596345 +0.71465567 +0.71335289 +0.71205509 +0.71076225 +0.70947434 +0.70819133 +0.70691320 +0.70563993 +0.70437148 +0.70310784 +0.70184897 +0.70059486 +0.69934548 +0.69810080 +0.69686080 +0.69562546 +0.69439474 +0.69316863 +0.69194710 +0.69073013 +0.68951769 +0.68830976 +0.68710631 +0.68590732 +0.68471277 +0.68352262 +0.68233686 +0.68115547 +0.67997841 +0.67880566 +0.67763720 +0.67647300 +0.67531305 +0.67415730 +0.67300575 +0.67185836 +0.67071511 +0.66957597 +0.66844092 +0.66730994 +0.66618298 +0.66506004 +0.66394108 +0.66282608 +0.66171501 +0.66060783 +0.65950453 +0.65840507 +0.65730943 +0.65621757 +0.65512946 +0.65404508 +0.65296438 +0.65188735 +0.65081393 +0.64974411 +0.64867784 +0.64761509 +0.64655582 +0.64549999 +0.64444756 +0.64339850 +0.64235276 +0.64131030 +0.64027109 +0.63923508 +0.63820224 +0.63717252 +0.63614590 +0.63512235 +0.63410183 +0.63308434 +0.63206985 +0.63105838 +0.63004992 +0.62904450 +0.62804216 +0.62704294 +0.62604692 +0.62505416 +0.62406475 +0.62307880 +0.62209638 +0.62111759 +0.62014250 +0.61917119 +0.61820370 +0.61724004 +0.61628023 +0.61532423 +0.61437202 +0.61342354 +0.61247873 +0.61153752 +0.61059984 +0.60966563 +0.60873483 +0.60780737 +0.60688320 +0.60596228 +0.60504456 +0.60413001 +0.60321859 +0.60231028 +0.60140504 +0.60050287 +0.59960372 +0.59870760 +0.59781446 +0.59692431 +0.59603712 +0.59515287 +0.59427156 +0.59339316 +0.59251765 +0.59164504 +0.59077529 +0.58990841 +0.58904436 +0.58818315 +0.58732474 +0.58646915 +0.58561633 +0.58476630 +0.58391902 +0.58307450 +0.58223271 +0.58139364 +0.58055728 +0.57972361 +0.57889263 +0.57806432 +0.57723867 +0.57641567 +0.57559530 +0.57477755 +0.57396240 +0.57314986 +0.57233990 +0.57153251 +0.57072768 +0.56992539 +0.56912565 +0.56832842 +0.56753371 +0.56674150 +0.56595178 +0.56516453 +0.56437975 +0.56359743 +0.56281754 +0.56204008 +0.56126505 +0.56049242 +0.55972218 +0.55895433 +0.55818885 +0.55742574 +0.55666497 +0.55590654 +0.55515043 +0.55439664 +0.55364516 +0.55289597 +0.55214905 +0.55140441 +0.55066202 +0.54992188 +0.54918397 +0.54844828 +0.54771481 +0.54698352 +0.54625443 +0.54552751 +0.54480275 +0.54408013 +0.54335965 +0.54264130 +0.54192505 +0.54121089 +0.54049882 +0.53978882 +0.53908086 +0.53837494 +0.53767105 +0.53696916 +0.53626925 +0.53557132 +0.53487534 +0.53418130 +0.53348917 +0.53279894 +0.53211058 +0.53142406 +0.53073938 +0.53005650 +0.52937539 +0.52869603 +0.52801839 +0.52734244 +0.52666815 +0.52599548 +0.52532441 +0.52465490 +0.52398691 +0.52332043 +0.52265541 +0.52199185 +0.52132972 +0.52066901 +0.52000974 +0.51935193 +0.51869562 +0.51804087 +0.51738776 +0.51673639 +0.51608689 +0.51543937 +0.51479395 +0.51415073 +0.51350978 +0.51287114 +0.51223480 +0.51160072 +0.51096885 +0.51033909 +0.50971139 +0.50908565 +0.50846181 +0.50783982 +0.50721964 +0.50660124 +0.50598459 +0.50536967 +0.50475646 +0.50414496 +0.50353515 +0.50292703 +0.50232059 +0.50171581 +0.50111269 +0.50051122 +0.49991140 +0.49931321 +0.49871666 +0.49812173 +0.49752841 +0.49693670 +0.49634660 +0.49575809 +0.49517117 +0.49458582 +0.49400206 +0.49341986 +0.49283922 +0.49226013 +0.49168259 +0.49110659 +0.49053213 +0.48995919 +0.48938778 +0.48881788 +0.48824948 +0.48768259 +0.48711720 +0.48655329 +0.48599086 +0.48542992 +0.48487044 +0.48431243 +0.48375587 +0.48320077 +0.48264711 +0.48209490 +0.48154411 +0.48099476 +0.48044683 +0.47990031 +0.47935521 +0.47881151 +0.47826920 +0.47772830 +0.47718877 +0.47665064 +0.47611387 +0.47557848 +0.47504445 +0.47451178 +0.47398047 +0.47345050 +0.47292188 +0.47239459 +0.47186864 +0.47134401 +0.47082070 +0.47029871 +0.46977803 +0.46925866 +0.46874058 +0.46822380 +0.46770831 +0.46719411 +0.46668118 +0.46616953 +0.46565915 +0.46515003 +0.46464217 +0.46413557 +0.46363021 +0.46312610 +0.46262322 +0.46212159 +0.46162118 +0.46112199 +0.46062403 +0.46012728 +0.45963174 +0.45913740 +0.45864427 +0.45815233 +0.45766158 +0.45717202 +0.45668364 +0.45619644 +0.45571041 +0.45522555 +0.45474186 +0.45425932 +0.45377793 +0.45329770 +0.45281861 +0.45234066 +0.45186385 +0.45138817 +0.45091362 +0.45044020 +0.44996789 +0.44949669 +0.44902661 +0.44855764 +0.44808976 +0.44762298 +0.44715730 +0.44669271 +0.44622920 +0.44576677 +0.44530542 +0.44484514 +0.44438593 +0.44392778 +0.44347070 +0.44301467 +0.44255969 +0.44210577 +0.44165288 +0.44120104 +0.44075023 +0.44030046 +0.43985171 +0.43940399 +0.43895729 +0.43851161 +0.43806694 +0.43762328 +0.43718063 +0.43673898 +0.43629832 +0.43585866 +0.43541999 +0.43498231 +0.43454561 +0.43410989 +0.43367515 +0.43324137 +0.43280857 +0.43237673 +0.43194586 +0.43151594 +0.43108697 +0.43065896 +0.43023189 +0.42980577 +0.42938058 +0.42895633 +0.42853302 +0.42811064 +0.42768918 +0.42726864 +0.42684903 +0.42643033 +0.42601254 +0.42559566 +0.42517969 +0.42476462 +0.42435045 +0.42393717 +0.42352479 +0.42311330 +0.42270269 +0.42229296 +0.42188412 +0.42147615 +0.42106906 +0.42066283 +0.42025747 +0.41985297 +0.41944934 +0.41904656 +0.41864464 +0.41824356 +0.41784334 +0.41744395 +0.41704541 +0.41664771 +0.41625084 +0.41585481 +0.41545960 +0.41506522 +0.41467166 +0.41427892 +0.41388700 +0.41349589 +0.41310559 +0.41271610 +0.41232742 +0.41193953 +0.41155244 +0.41116615 +0.41078065 +0.41039594 +0.41001201 +0.40962887 +0.40924650 +0.40886492 +0.40848410 +0.40810406 +0.40772478 +0.40734627 +0.40696852 +0.40659152 +0.40621528 +0.40583980 +0.40546505 +0.40509106 +0.40471780 +0.40434529 +0.40397350 +0.40360245 +0.40323213 +0.40286252 +0.40249364 +0.40212548 +0.40175802 +0.40139128 +0.40102524 +0.40065990 +0.40029526 +0.39993130 +0.39956804 +0.39920545 +0.39884354 +0.39848231 +0.39812174 +0.39776183 +0.39740258 +0.39704397 +0.39668600 +0.39632867 +0.39597196 +0.39561587 +0.39526039 +0.39490551 +0.39455121 +0.39419749 +0.39384433 +0.39349171 +0.39313963 +0.39278806 +0.39243699 +0.39208638 +0.39173621 +0.39138646 +0.39103709 +0.39068805 +0.39033930 +0.38999079 +0.38964245 +0.38929420 +0.38894595 +0.38859757 +0.38824894 +0.38789987 +0.38755013 +0.38719945 +0.38684746 +0.38649368 +0.38613747 +0.38577796 +0.38541394 +0.38504370 +0.38466478 +0.38427351 +0.38386438 +0.38342893 +0.38295434 +0.38242227 +0.38181174 +0.38111570 +0.38037709 +0.37969060 +0.37911033 +0.37861118 +0.37815441 +0.37771794 +0.37729158 +0.37687063 +0.37645283 +0.37603702 +0.37562256 +0.37520910 +0.37479642 +0.37438440 +0.37397295 +0.37356203 +0.37315160 +0.37274165 +0.37233215 +0.37192312 +0.37151455 +0.37110643 +0.37069879 +0.37029162 +0.36988492 +0.36947872 +0.36907302 +0.36866782 +0.36826313 +0.36785897 +0.36745532 +0.36705221 +0.36664963 +0.36624759 +0.36584608 +0.36544511 +0.36504469 +0.36464480 +0.36424545 +0.36384663 +0.36344834 +0.36305059 +0.36265336 +0.36225665 +0.36186045 +0.36146478 +0.36106962 +0.36067498 +0.36028084 +0.35988722 +0.35949411 +0.35910150 +0.35870941 +0.35831782 +0.35792674 +0.35753615 +0.35714606 +0.35675645 +0.35636731 +0.35597862 +0.35559036 +0.35520253 +0.35481507 +0.35442798 +0.35404122 +0.35365476 +0.35326856 +0.35288259 +0.35249682 +0.35211119 +0.35172568 +0.35134025 +0.35095485 +0.35056946 +0.35018402 +0.34979851 +0.34941287 +0.34902709 +0.34864111 +0.34825491 +0.34786844 +0.34748167 +0.34709458 +0.34670712 +0.34631927 +0.34593100 +0.34554229 +0.34515312 +0.34476346 +0.34437329 +0.34398261 +0.34359141 +0.34319968 +0.34280742 +0.34241463 +0.34202132 +0.34162751 +0.34123320 +0.34083843 +0.34044321 +0.34004757 +0.33965156 +0.33925519 +0.33885853 +0.33846160 +0.33806446 +0.33766714 +0.33726971 +0.33687221 +0.33647468 +0.33607719 +0.33567979 +0.33528251 +0.33488542 +0.33448856 +0.33409198 +0.33369572 +0.33329982 +0.33290433 +0.33250929 +0.33211473 +0.33172070 +0.33132721 +0.33093431 +0.33054201 +0.33015036 +0.32975937 +0.32936906 +0.32897946 +0.32859058 +0.32820245 +0.32781508 +0.32742849 +0.32704268 +0.32665767 +0.32627348 +0.32589010 +0.32550756 +0.32512585 +0.32474499 +0.32436498 +0.32398582 +0.32360753 +0.32323011 +0.32285355 +0.32247787 +0.32210306 +0.32172913 +0.32135608 +0.32098391 +0.32061262 +0.32024222 +0.31987269 +0.31950405 +0.31913629 +0.31876941 +0.31840341 +0.31803829 +0.31767405 +0.31731069 +0.31694821 +0.31658659 +0.31622586 +0.31586599 +0.31550699 +0.31514885 +0.31479158 +0.31443518 +0.31407963 +0.31372494 +0.31337111 +0.31301813 +0.31266600 +0.31231471 +0.31196427 +0.31161467 +0.31126592 +0.31091800 +0.31057091 +0.31022465 +0.30987922 +0.30953462 +0.30919084 +0.30884788 +0.30850573 +0.30816440 +0.30782388 +0.30748417 +0.30714527 +0.30680717 +0.30646987 +0.30613337 +0.30579766 +0.30546274 +0.30512862 +0.30479528 +0.30446272 +0.30413095 +0.30379996 +0.30346974 +0.30314030 +0.30281163 +0.30248373 +0.30215660 +0.30183024 +0.30150464 +0.30117980 +0.30085572 +0.30053239 +0.30020982 +0.29988801 +0.29956694 +0.29924663 +0.29892706 +0.29860824 +0.29829016 +0.29797282 +0.29765623 +0.29734037 +0.29702525 +0.29671086 +0.29639721 +0.29608429 +0.29577211 +0.29546065 +0.29514991 +0.29483991 +0.29453062 +0.29422206 +0.29391422 +0.29360709 +0.29330068 +0.29299499 +0.29269001 +0.29238574 +0.29208218 +0.29177932 +0.29147717 +0.29117572 +0.29087498 +0.29057493 +0.29027557 +0.28997691 +0.28967893 +0.28938165 +0.28908505 +0.28878913 +0.28849389 +0.28819933 +0.28790544 +0.28761223 +0.28731968 +0.28702780 +0.28673657 +0.28644601 +0.28615611 +0.28586685 +0.28557825 +0.28529029 +0.28500297 +0.28471629 +0.28443025 +0.28414484 +0.28386006 +0.28357590 +0.28329237 +0.28300945 +0.28272714 +0.28244545 +0.28216436 +0.28188387 +0.28160398 +0.28132468 +0.28104597 +0.28076784 +0.28049030 +0.28021332 +0.27993692 +0.27966108 +0.27938580 +0.27911107 +0.27883689 +0.27856325 +0.27829015 +0.27801758 +0.27774552 +0.27747398 +0.27720295 +0.27693242 +0.27666237 +0.27639281 +0.27612372 +0.27585509 +0.27558691 +0.27531918 +0.27505187 +0.27478498 +0.27451849 +0.27425240 +0.27398668 +0.27372133 +0.27345633 +0.27319167 +0.27292733 +0.27266331 +0.27239960 +0.27213618 +0.27187306 +0.27161023 +0.27134770 +0.27108549 +0.27082361 +0.27056210 +0.27030100 +0.27004035 +0.26978022 +0.26952066 +0.26926177 +0.26900360 +0.26874623 +0.26848971 +0.26823409 +0.26797940 +0.26772565 +0.26747282 +0.26722090 +0.26696984 +0.26671961 +0.26647015 +0.26622142 +0.26597337 +0.26572596 +0.26547918 +0.26523298 +0.26498734 +0.26474226 +0.26449772 +0.26425370 +0.26401021 +0.26376722 +0.26352475 +0.26328278 +0.26304131 +0.26280034 +0.26255987 +0.26231989 +0.26208040 +0.26184140 +0.26160289 +0.26136487 +0.26112733 +0.26089027 +0.26065369 +0.26041759 +0.26018196 +0.25994682 +0.25971214 +0.25947794 +0.25924420 +0.25901093 +0.25877814 +0.25854580 +0.25831393 +0.25808252 +0.25785157 +0.25762108 +0.25739105 +0.25716147 +0.25693234 +0.25670367 +0.25647545 +0.25624768 +0.25602035 +0.25579348 +0.25556704 +0.25534105 +0.25511551 +0.25489040 +0.25466573 +0.25444150 +0.25421771 +0.25399435 +0.25377143 +0.25354893 +0.25332687 +0.25310524 +0.25288403 +0.25266325 +0.25244290 +0.25222297 +0.25200346 +0.25178437 +0.25156570 +0.25134745 +0.25112962 +0.25091221 +0.25069520 +0.25047861 +0.25026244 +0.25004667 +0.24983131 +0.24961636 +0.24940182 +0.24918768 +0.24897395 +0.24876062 +0.24854769 +0.24833516 +0.24812303 +0.24791129 +0.24769995 +0.24748901 +0.24727846 +0.24706831 +0.24685854 +0.24664917 +0.24644018 +0.24623158 +0.24602337 +0.24581554 +0.24560810 +0.24540103 +0.24519435 +0.24498805 +0.24478213 +0.24457658 +0.24437141 +0.24416662 +0.24396219 +0.24375814 +0.24355446 +0.24335115 +0.24314821 +0.24294563 +0.24274342 +0.24254158 +0.24234009 +0.24213897 +0.24193821 +0.24173780 +0.24153775 +0.24133806 +0.24113872 +0.24093974 +0.24074110 +0.24054282 +0.24034488 +0.24014729 +0.23995004 +0.23975314 +0.23955658 +0.23936036 +0.23916448 +0.23896893 +0.23877372 +0.23857885 +0.23838430 +0.23819009 +0.23799620 +0.23780264 +0.23760940 +0.23741648 +0.23722389 +0.23703161 +0.23683965 +0.23664800 +0.23645667 +0.23626564 +0.23607492 +0.23588451 +0.23569441 +0.23550460 +0.23531510 +0.23512589 +0.23493698 +0.23474837 +0.23456005 +0.23437202 +0.23418428 +0.23399683 +0.23380967 +0.23362279 +0.23343621 +0.23324991 +0.23306390 +0.23287818 +0.23269274 +0.23250761 +0.23232276 +0.23213821 +0.23195397 +0.23177003 +0.23158639 +0.23140307 +0.23122008 +0.23103740 +0.23085506 +0.23067306 +0.23049140 +0.23031009 +0.23012913 +0.22994853 +0.22976829 +0.22958842 +0.22940892 +0.22922979 +0.22905103 +0.22887264 +0.22869462 +0.22851697 +0.22833968 +0.22816275 +0.22798618 +0.22780996 +0.22763410 +0.22745858 +0.22728340 +0.22710856 +0.22693406 +0.22675989 +0.22658605 +0.22641254 +0.22623935 +0.22606648 +0.22589394 +0.22572171 +0.22554979 +0.22537819 +0.22520691 +0.22503593 +0.22486527 +0.22469492 +0.22452487 +0.22435513 +0.22418569 +0.22401656 +0.22384774 +0.22367921 +0.22351099 +0.22334307 +0.22317545 +0.22300813 +0.22284110 +0.22267437 +0.22250794 +0.22234180 +0.22217596 +0.22201041 +0.22184515 +0.22168019 +0.22151551 +0.22135113 +0.22118703 +0.22102323 +0.22085971 +0.22069647 +0.22053352 +0.22037086 +0.22020848 +0.22004639 +0.21988457 +0.21972304 +0.21956179 +0.21940082 +0.21924013 +0.21907972 +0.21891958 +0.21875972 +0.21860014 +0.21844084 +0.21828180 +0.21812305 +0.21796456 +0.21780635 +0.21764841 +0.21749074 +0.21733334 +0.21717621 +0.21701935 +0.21686276 +0.21670643 +0.21655037 +0.21639458 +0.21623905 +0.21608378 +0.21592878 +0.21577404 +0.21561957 +0.21546535 +0.21531140 +0.21515770 +0.21500427 +0.21485109 +0.21469818 +0.21454552 +0.21439311 +0.21424096 +0.21408907 +0.21393743 +0.21378605 +0.21363492 +0.21348404 +0.21333341 +0.21318304 +0.21303291 +0.21288304 +0.21273341 +0.21258404 +0.21243491 +0.21228603 +0.21213739 +0.21198900 +0.21184086 +0.21169296 +0.21154530 +0.21139789 +0.21125072 +0.21110380 +0.21095711 +0.21081067 +0.21066446 +0.21051850 +0.21037277 +0.21022728 +0.21008203 +0.20993702 +0.20979225 +0.20964771 +0.20950340 +0.20935933 +0.20921549 +0.20907189 +0.20892852 +0.20878539 +0.20864248 +0.20849981 +0.20835736 +0.20821515 +0.20807316 +0.20793141 +0.20778988 +0.20764858 +0.20750751 +0.20736666 +0.20722604 +0.20708565 +0.20694548 +0.20680553 +0.20666581 +0.20652631 +0.20638703 +0.20624797 +0.20610914 +0.20597053 +0.20583213 +0.20569396 +0.20555601 +0.20541827 +0.20528075 +0.20514345 +0.20500637 +0.20486950 +0.20473285 +0.20459642 +0.20446020 +0.20432419 +0.20418840 +0.20405282 +0.20391745 +0.20378229 +0.20364735 +0.20351262 +0.20337810 +0.20324378 +0.20310968 +0.20297579 +0.20284210 +0.20270863 +0.20257536 +0.20244229 +0.20230944 +0.20217679 +0.20204434 +0.20191210 +0.20178007 +0.20164824 +0.20151661 +0.20138518 +0.20125396 +0.20112294 +0.20099212 +0.20086150 +0.20073108 +0.20060086 +0.20047085 +0.20034103 +0.20021140 +0.20008198 +0.19995276 +0.19982373 +0.19969490 +0.19956626 +0.19943782 +0.19930957 +0.19918152 +0.19905367 +0.19892601 +0.19879854 +0.19867126 +0.19854418 +0.19841728 +0.19829058 +0.19816408 +0.19803776 +0.19791163 +0.19778569 +0.19765994 +0.19753438 +0.19740900 +0.19728382 +0.19715882 +0.19703401 +0.19690939 +0.19678495 +0.19666070 +0.19653663 +0.19641275 +0.19628905 +0.19616553 +0.19604220 +0.19591905 +0.19579609 +0.19567331 +0.19555070 +0.19542828 +0.19530605 +0.19518399 +0.19506211 +0.19494041 +0.19481889 +0.19469755 +0.19457638 +0.19445540 +0.19433459 +0.19421396 +0.19409351 +0.19397323 +0.19385313 +0.19373320 +0.19361345 +0.19349387 +0.19337447 +0.19325524 +0.19313619 +0.19301731 +0.19289860 +0.19278006 +0.19266169 +0.19254350 +0.19242548 +0.19230763 +0.19218994 +0.19207243 +0.19195509 +0.19183792 +0.19172091 +0.19160407 +0.19148741 +0.19137090 +0.19125457 +0.19113840 +0.19102240 +0.19090657 +0.19079090 +0.19067540 +0.19056006 +0.19044488 +0.19032987 +0.19021502 +0.19010034 +0.18998582 +0.18987146 +0.18975727 +0.18964323 +0.18952936 +0.18941565 +0.18930210 +0.18918871 +0.18907548 +0.18896241 +0.18884950 +0.18873675 +0.18862416 +0.18851172 +0.18839945 +0.18828733 +0.18817537 +0.18806356 +0.18795191 +0.18784042 +0.18772908 +0.18761790 +0.18750688 +0.18739601 +0.18728529 +0.18717473 +0.18706432 +0.18695406 +0.18684396 +0.18673401 +0.18662421 +0.18651456 +0.18640507 +0.18629573 +0.18618653 +0.18607749 +0.18596860 +0.18585986 +0.18575127 +0.18564283 +0.18553453 +0.18542639 +0.18531839 +0.18521054 +0.18510284 +0.18499529 +0.18488788 +0.18478062 +0.18467351 +0.18456654 +0.18445972 +0.18435304 +0.18424651 +0.18414012 +0.18403388 +0.18392778 +0.18382182 +0.18371601 +0.18361034 +0.18350482 +0.18339943 +0.18329419 +0.18318909 +0.18308413 +0.18297932 +0.18287464 +0.18277010 +0.18266571 +0.18256145 +0.18245733 +0.18235336 +0.18224952 +0.18214582 +0.18204226 +0.18193884 +0.18183555 +0.18173240 +0.18162939 +0.18152652 +0.18142378 +0.18132118 +0.18121871 +0.18111638 +0.18101418 +0.18091212 +0.18081020 +0.18070841 +0.18060675 +0.18050523 +0.18040384 +0.18030258 +0.18020145 +0.18010046 +0.17999960 +0.17989887 +0.17979828 +0.17969781 +0.17959748 +0.17949727 +0.17939720 +0.17929725 +0.17919744 +0.17909775 +0.17899820 +0.17889877 +0.17879947 +0.17870030 +0.17860126 +0.17850235 +0.17840356 +0.17830490 +0.17820637 +0.17810796 +0.17800968 +0.17791152 +0.17781349 +0.17771558 +0.17761780 +0.17752015 +0.17742262 +0.17732521 +0.17722792 +0.17713076 +0.17703372 +0.17693681 +0.17684002 +0.17674334 +0.17664680 +0.17655037 +0.17645406 +0.17635787 +0.17626181 +0.17616586 +0.17607003 +0.17597433 +0.17587874 +0.17578327 +0.17568792 +0.17559269 +0.17549757 +0.17540258 +0.17530770 +0.17521293 +0.17511828 +0.17502375 +0.17492934 +0.17483504 +0.17474085 +0.17464678 +0.17455283 +0.17445899 +0.17436526 +0.17427164 +0.17417814 +0.17408475 +0.17399147 +0.17389831 +0.17380525 +0.17371231 +0.17361947 +0.17352675 +0.17343414 +0.17334163 +0.17324924 +0.17315695 +0.17306477 +0.17297270 +0.17288073 +0.17278887 +0.17269712 +0.17260548 +0.17251393 +0.17242250 +0.17233117 +0.17223994 +0.17214881 +0.17205779 +0.17196687 +0.17187605 +0.17178533 +0.17169471 +0.17160419 +0.17151377 +0.17142345 +0.17133323 +0.17124310 +0.17115307 +0.17106314 +0.17097330 +0.17088356 +0.17079391 +0.17070435 +0.17061488 +0.17052551 +0.17043622 +0.17034703 +0.17025792 +0.17016890 +0.17007997 +0.16999112 +0.16990236 +0.16981367 +0.16972508 +0.16963656 +0.16954812 +0.16945976 +0.16937148 +0.16928327 +0.16919513 +0.16910707 +0.16901908 +0.16893116 +0.16884331 +0.16875552 +0.16866780 +0.16858013 +0.16849253 +0.16840499 +0.16831750 +0.16823006 +0.16814268 +0.16805534 +0.16796806 +0.16788081 +0.16779361 +0.16770644 +0.16761931 +0.16753221 +0.16744514 +0.16735809 +0.16727107 +0.16718407 +0.16709709 +0.16701012 +0.16692316 +0.16683620 +0.16674926 +0.16666231 +0.16657536 +0.16648841 +0.16640145 +0.16631449 +0.16622752 +0.16614054 +0.16605355 +0.16596656 +0.16587956 +0.16579256 +0.16570557 +0.16561858 +0.16553162 +0.16544468 +0.16535778 +0.16527094 +0.16518416 +0.16509747 +0.16501088 +0.16492441 +0.16483808 +0.16475192 +0.16466595 +0.16458018 +0.16449464 +0.16440935 +0.16432433 +0.16423958 +0.16415512 +0.16407096 +0.16398711 +0.16390355 +0.16382029 +0.16373733 +0.16365466 +0.16357226 +0.16349012 +0.16340823 +0.16332657 +0.16324514 +0.16316391 +0.16308287 +0.16300202 +0.16292133 +0.16284080 +0.16276042 +0.16268018 +0.16260007 +0.16252009 +0.16244023 +0.16236049 +0.16228086 +0.16220134 +0.16212192 +0.16204261 +0.16196340 +0.16188428 +0.16180527 +0.16172635 +0.16164753 +0.16156880 +0.16149017 +0.16141163 +0.16133318 +0.16125482 +0.16117656 +0.16109839 +0.16102030 +0.16094231 +0.16086441 +0.16078660 +0.16070888 +0.16063125 +0.16055370 +0.16047625 +0.16039888 +0.16032161 +0.16024442 +0.16016732 +0.16009031 +0.16001339 +0.15993655 +0.15985980 +0.15978314 +0.15970657 +0.15963008 +0.15955368 +0.15947737 +0.15940115 +0.15932501 +0.15924895 +0.15917299 +0.15909711 +0.15902131 +0.15894561 +0.15886998 +0.15879445 +0.15871900 +0.15864363 +0.15856835 +0.15849316 +0.15841805 +0.15834302 +0.15826808 +0.15819323 +0.15811846 +0.15804377 +0.15796917 +0.15789465 +0.15782021 +0.15774586 +0.15767160 +0.15759742 +0.15752332 +0.15744930 +0.15737537 +0.15730152 +0.15722776 +0.15715407 +0.15708047 +0.15700696 +0.15693352 +0.15686017 +0.15678690 +0.15671372 +0.15664061 +0.15656759 +0.15649465 +0.15642179 +0.15634902 +0.15627632 +0.15620371 +0.15613118 +0.15605872 +0.15598636 +0.15591407 +0.15584186 +0.15576973 +0.15569769 +0.15562572 +0.15555384 +0.15548203 +0.15541031 +0.15533867 +0.15526710 +0.15519562 +0.15512422 +0.15505289 +0.15498165 +0.15491048 +0.15483940 +0.15476839 +0.15469746 +0.15462661 +0.15455584 +0.15448515 +0.15441454 +0.15434401 +0.15427355 +0.15420317 +0.15413287 +0.15406265 +0.15399251 +0.15392244 +0.15385245 +0.15378254 +0.15371271 +0.15364295 +0.15357327 +0.15350367 +0.15343415 +0.15336470 +0.15329532 +0.15322603 +0.15315681 +0.15308767 +0.15301860 +0.15294961 +0.15288069 +0.15281185 +0.15274309 +0.15267440 +0.15260578 +0.15253725 +0.15246878 +0.15240039 +0.15233208 +0.15226384 +0.15219567 +0.15212758 +0.15205957 +0.15199162 +0.15192376 +0.15185596 +0.15178824 +0.15172059 +0.15165302 +0.15158552 +0.15151809 +0.15145073 +0.15138345 +0.15131624 +0.15124911 +0.15118204 +0.15111505 +0.15104813 +0.15098128 +0.15091451 +0.15084780 +0.15078117 +0.15071461 +0.15064812 +0.15058170 +0.15051535 +0.15044908 +0.15038287 +0.15031674 +0.15025068 +0.15018468 +0.15011876 +0.15005291 +0.14998712 +0.14992141 +0.14985577 +0.14979020 +0.14972469 +0.14965926 +0.14959390 +0.14952860 +0.14946337 +0.14939822 +0.14933313 +0.14926811 +0.14920316 +0.14913828 +0.14907346 +0.14900872 +0.14894404 +0.14887943 +0.14881489 +0.14875041 +0.14868601 +0.14862167 +0.14855740 +0.14849319 +0.14842906 +0.14836499 +0.14830098 +0.14823705 +0.14817318 +0.14810937 +0.14804564 +0.14798197 +0.14791836 +0.14785483 +0.14779135 +0.14772795 +0.14766461 +0.14760133 +0.14753812 +0.14747498 +0.14741190 +0.14734889 +0.14728594 +0.14722306 +0.14716024 +0.14709749 +0.14703480 +0.14697218 +0.14690962 +0.14684712 +0.14678469 +0.14672233 +0.14666002 +0.14659778 +0.14653561 +0.14647350 +0.14641145 +0.14634947 +0.14628755 +0.14622569 +0.14616390 +0.14610216 +0.14604050 +0.14597889 +0.14591735 +0.14585587 +0.14579445 +0.14573310 +0.14567181 +0.14561058 +0.14554941 +0.14548830 +0.14542726 +0.14536628 +0.14530536 +0.14524450 +0.14518370 +0.14512296 +0.14506229 +0.14500168 +0.14494113 +0.14488064 +0.14482021 +0.14475984 +0.14469953 +0.14463928 +0.14457910 +0.14451897 +0.14445890 +0.14439890 +0.14433895 +0.14427907 +0.14421924 +0.14415948 +0.14409977 +0.14404013 +0.14398054 +0.14392101 +0.14386155 +0.14380214 +0.14374279 +0.14368350 +0.14362427 +0.14356510 +0.14350599 +0.14344693 +0.14338794 +0.14332900 +0.14327013 +0.14321131 +0.14315255 +0.14309384 +0.14303520 +0.14297661 +0.14291809 +0.14285962 +0.14280120 +0.14274285 +0.14268455 +0.14262631 +0.14256813 +0.14251001 +0.14245194 +0.14239393 +0.14233597 +0.14227808 +0.14222024 +0.14216246 +0.14210473 +0.14204706 +0.14198945 +0.14193190 +0.14187440 +0.14181695 +0.14175957 +0.14170224 +0.14164496 +0.14158774 +0.14153058 +0.14147347 +0.14141642 +0.14135943 +0.14130249 +0.14124560 +0.14118877 +0.14113200 +0.14107528 +0.14101862 +0.14096201 +0.14090546 +0.14084896 +0.14079252 +0.14073613 +0.14067979 +0.14062352 +0.14056729 +0.14051112 +0.14045500 +0.14039894 +0.14034293 +0.14028698 +0.14023108 +0.14017524 +0.14011944 +0.14006371 +0.14000802 +0.13995239 +0.13989681 +0.13984129 +0.13978582 +0.13973040 +0.13967504 +0.13961973 +0.13956447 +0.13950927 +0.13945412 +0.13939902 +0.13934397 +0.13928898 +0.13923404 +0.13917915 +0.13912431 +0.13906953 +0.13901480 +0.13896012 +0.13890549 +0.13885092 +0.13879639 +0.13874192 +0.13868750 +0.13863314 +0.13857882 +0.13852456 +0.13847035 +0.13841618 +0.13836207 +0.13830802 +0.13825401 +0.13820005 +0.13814615 +0.13809229 +0.13803849 +0.13798474 +0.13793104 +0.13787739 +0.13782379 +0.13777024 +0.13771674 +0.13766329 +0.13760989 +0.13755655 +0.13750325 +0.13745000 +0.13739680 +0.13734366 +0.13729056 +0.13723751 +0.13718451 +0.13713157 +0.13707867 +0.13702582 +0.13697302 +0.13692027 +0.13686757 +0.13681492 +0.13676231 +0.13670976 +0.13665726 +0.13660480 +0.13655240 +0.13650004 +0.13644773 +0.13639547 +0.13634326 +0.13629110 +0.13623898 +0.13618692 +0.13613490 +0.13608293 +0.13603101 +0.13597914 +0.13592731 +0.13587554 +0.13582381 +0.13577213 +0.13572049 +0.13566891 +0.13561737 +0.13556588 +0.13551444 +0.13546304 +0.13541170 +0.13536040 +0.13530914 +0.13525794 +0.13520678 +0.13515567 +0.13510460 +0.13505359 +0.13500262 +0.13495169 +0.13490081 +0.13484998 +0.13479920 +0.13474846 +0.13469777 +0.13464713 +0.13459653 +0.13454598 +0.13449547 +0.13444501 +0.13439460 +0.13434423 +0.13429391 +0.13424363 +0.13419340 +0.13414322 +0.13409308 +0.13404299 +0.13399294 +0.13394294 +0.13389298 +0.13384307 +0.13379320 +0.13374338 +0.13369361 +0.13364388 +0.13359419 +0.13354455 +0.13349496 +0.13344541 +0.13339590 +0.13334644 +0.13329702 +0.13324765 +0.13319832 +0.13314904 +0.13309980 +0.13305061 +0.13300145 +0.13295235 +0.13290329 +0.13285427 +0.13280530 +0.13275637 +0.13270748 +0.13265864 +0.13260984 +0.13256108 +0.13251237 +0.13246371 +0.13241508 +0.13236650 +0.13231796 +0.13226947 +0.13222102 +0.13217261 +0.13212425 +0.13207593 +0.13202765 +0.13197941 +0.13193122 +0.13188307 +0.13183496 +0.13178690 +0.13173888 +0.13169090 +0.13164296 +0.13159507 +0.13154722 +0.13149941 +0.13145164 +0.13140392 +0.13135624 +0.13130860 +0.13126100 +0.13121344 +0.13116593 +0.13111846 +0.13107102 +0.13102364 +0.13097629 +0.13092898 +0.13088172 +0.13083450 +0.13078732 +0.13074018 +0.13069308 +0.13064602 +0.13059901 +0.13055204 +0.13050510 +0.13045821 +0.13041136 +0.13036455 +0.13031778 +0.13027105 +0.13022437 +0.13017772 +0.13013111 +0.13008455 +0.13003803 +0.12999154 +0.12994510 +0.12989870 +0.12985233 +0.12980601 +0.12975973 +0.12971349 +0.12966729 +0.12962113 +0.12957500 +0.12952892 +0.12948288 +0.12943688 +0.12939092 +0.12934500 +0.12929911 +0.12925327 +0.12920747 +0.12916171 +0.12911598 +0.12907030 +0.12902465 +0.12897905 +0.12893348 +0.12888796 +0.12884247 +0.12879702 +0.12875161 +0.12870624 +0.12866091 +0.12861562 +0.12857037 +0.12852515 +0.12847998 +0.12843484 +0.12838974 +0.12834468 +0.12829966 +0.12825468 +0.12820974 +0.12816483 +0.12811996 +0.12807514 +0.12803035 +0.12798559 +0.12794088 +0.12789620 +0.12785157 +0.12780697 +0.12776241 +0.12771788 +0.12767340 +0.12762895 +0.12758454 +0.12754017 +0.12749583 +0.12745153 +0.12740728 +0.12736305 +0.12731887 +0.12727472 +0.12723061 +0.12718654 +0.12714251 +0.12709851 +0.12705455 +0.12701062 +0.12696674 +0.12692289 +0.12687907 +0.12683530 +0.12679156 +0.12674786 +0.12670419 +0.12666056 +0.12661697 +0.12657342 +0.12652990 +0.12648642 +0.12644297 +0.12639956 +0.12635619 +0.12631285 +0.12626955 +0.12622628 +0.12618305 +0.12613986 +0.12609671 +0.12605358 +0.12601050 +0.12596745 +0.12592444 +0.12588146 +0.12583852 +0.12579561 +0.12575274 +0.12570990 +0.12566710 +0.12562434 +0.12558161 +0.12553892 +0.12549626 +0.12545363 +0.12541104 +0.12536849 +0.12532597 +0.12528349 +0.12524104 +0.12519862 +0.12515624 +0.12511390 +0.12507159 +0.12502931 +0.12498707 +0.12494487 +0.12490269 +0.12486056 +0.12481845 +0.12477638 +0.12473435 +0.12469235 +0.12465038 +0.12460845 +0.12456655 +0.12452468 +0.12448285 +0.12444105 +0.12439929 +0.12435756 +0.12431586 +0.12427420 +0.12423257 +0.12419097 +0.12414941 +0.12410788 +0.12406639 +0.12402492 +0.12398349 +0.12394210 +0.12390073 +0.12385940 +0.12381811 +0.12377684 +0.12373561 +0.12369441 +0.12365324 +0.12361211 +0.12357101 +0.12352994 +0.12348891 +0.12344790 +0.12340693 +0.12336600 +0.12332509 +0.12328422 +0.12324338 +0.12320257 +0.12316179 +0.12312105 +0.12308034 +0.12303966 +0.12299901 +0.12295839 +0.12291781 +0.12287726 +0.12283674 +0.12279625 +0.12275579 +0.12271537 +0.12267497 +0.12263461 +0.12259428 +0.12255399 +0.12251372 +0.12247349 +0.12243328 +0.12239311 +0.12235297 +0.12231286 +0.12227279 +0.12223274 +0.12219273 +0.12215275 +0.12211280 +0.12207288 +0.12203299 +0.12199313 +0.12195331 +0.12191351 +0.12187375 +0.12183402 +0.12179432 +0.12175465 +0.12171502 +0.12167541 +0.12163583 +0.12159629 +0.12155678 +0.12151730 +0.12147785 +0.12143843 +0.12139905 +0.12135969 +0.12132037 +0.12128108 +0.12124182 +0.12120259 +0.12116339 +0.12112422 +0.12108509 +0.12104598 +0.12100691 +0.12096787 +0.12092886 +0.12088988 +0.12085094 +0.12081202 +0.12077314 +0.12073429 +0.12069547 +0.12065668 +0.12061792 +0.12057920 +0.12054050 +0.12050184 +0.12046321 +0.12042461 +0.12038604 +0.12034751 +0.12030901 +0.12027054 +0.12023210 +0.12019369 +0.12015531 +0.12011697 +0.12007865 +0.12004037 +0.12000212 +0.11996391 +0.11992572 +0.11988757 +0.11984945 +0.11981136 +0.11977330 +0.11973527 +0.11969728 +0.11965931 +0.11962138 +0.11958348 +0.11954561 +0.11950778 +0.11946997 +0.11943220 +0.11939446 +0.11935675 +0.11931907 +0.11928142 +0.11924381 +0.11920622 +0.11916867 +0.11913115 +0.11909366 +0.11905620 +0.11901877 +0.11898138 +0.11894401 +0.11890668 +0.11886937 +0.11883210 +0.11879486 +0.11875765 +0.11872047 +0.11868332 +0.11864620 +0.11860911 +0.11857206 +0.11853503 +0.11849803 +0.11846107 +0.11842413 +0.11838723 +0.11835035 +0.11831351 +0.11827669 +0.11823991 +0.11820315 +0.11816643 +0.11812973 +0.11809307 +0.11805643 +0.11801983 +0.11798325 +0.11794671 +0.11791019 +0.11787370 +0.11783725 +0.11780082 +0.11776442 +0.11772805 +0.11769171 +0.11765540 +0.11761911 +0.11758286 +0.11754664 +0.11751044 +0.11747427 +0.11743814 +0.11740203 +0.11736595 +0.11732989 +0.11729387 +0.11725788 +0.11722191 +0.11718597 +0.11715007 +0.11711418 +0.11707833 +0.11704251 +0.11700671 +0.11697095 +0.11693521 +0.11689950 +0.11686381 +0.11682816 +0.11679253 +0.11675693 +0.11672136 +0.11668582 +0.11665030 +0.11661482 +0.11657936 +0.11654392 +0.11650852 +0.11647314 +0.11643780 +0.11640247 +0.11636718 +0.11633191 +0.11629668 +0.11626146 +0.11622628 +0.11619112 +0.11615599 +0.11612089 +0.11608582 +0.11605077 +0.11601575 +0.11598076 +0.11594579 +0.11591085 +0.11587594 +0.11584105 +0.11580620 +0.11577137 +0.11573656 +0.11570178 +0.11566703 +0.11563231 +0.11559761 +0.11556294 +0.11552830 +0.11549368 +0.11545909 +0.11542453 +0.11538999 +0.11535548 +0.11532100 +0.11528654 +0.11525211 +0.11521771 +0.11518333 +0.11514898 +0.11511465 +0.11508035 +0.11504608 +0.11501183 +0.11497761 +0.11494342 +0.11490925 +0.11487511 +0.11484099 +0.11480690 +0.11477284 +0.11473880 +0.11470479 +0.11467080 +0.11463684 +0.11460291 +0.11456900 +0.11453511 +0.11450126 +0.11446743 +0.11443362 +0.11439984 +0.11436609 +0.11433236 +0.11429865 +0.11426498 +0.11423132 +0.11419770 +0.11416410 +0.11413052 +0.11409697 +0.11406345 +0.11402995 +0.11399647 +0.11396302 +0.11392960 +0.11389620 +0.11386283 +0.11382948 +0.11379616 +0.11376286 +0.11372959 +0.11369634 +0.11366312 +0.11362992 +0.11359674 +0.11356360 +0.11353047 +0.11349738 +0.11346430 +0.11343125 +0.11339823 +0.11336523 +0.11333226 +0.11329931 +0.11326638 +0.11323348 +0.11320061 +0.11316776 +0.11313493 +0.11310213 +0.11306935 +0.11303660 +0.11300387 +0.11297117 +0.11293849 +0.11290583 +0.11287320 +0.11284059 +0.11280801 +0.11277545 +0.11274292 +0.11271041 +0.11267793 +0.11264546 +0.11261303 +0.11258061 +0.11254823 +0.11251586 +0.11248352 +0.11245120 +0.11241891 +0.11238664 +0.11235440 +0.11232217 +0.11228998 +0.11225780 +0.11222565 +0.11219353 +0.11216143 +0.11212935 +0.11209729 +0.11206526 +0.11203325 +0.11200127 +0.11196931 +0.11193737 +0.11190546 +0.11187357 +0.11184170 +0.11180986 +0.11177804 +0.11174624 +0.11171447 +0.11168272 +0.11165099 +0.11161929 +0.11158761 +0.11155596 +0.11152432 +0.11149271 +0.11146113 +0.11142956 +0.11139802 +0.11136650 +0.11133501 +0.11130354 +0.11127209 +0.11124067 +0.11120926 +0.11117788 +0.11114653 +0.11111519 +0.11108388 +0.11105259 +0.11102133 +0.11099009 +0.11095887 +0.11092767 +0.11089650 +0.11086534 +0.11083421 +0.11080311 +0.11077202 +0.11074096 +0.11070992 +0.11067891 +0.11064792 +0.11061694 +0.11058600 +0.11055507 +0.11052417 +0.11049328 +0.11046242 +0.11043159 +0.11040077 +0.11036998 +0.11033921 +0.11030846 +0.11027774 +0.11024703 +0.11021635 +0.11018569 +0.11015506 +0.11012444 +0.11009385 +0.11006328 +0.11003273 +0.11000220 +0.10997170 +0.10994122 +0.10991075 +0.10988032 +0.10984990 +0.10981950 +0.10978913 +0.10975878 +0.10972845 +0.10969814 +0.10966785 +0.10963759 +0.10960734 +0.10957712 +0.10954692 +0.10951674 +0.10948659 +0.10945645 +0.10942634 +0.10939625 +0.10936618 +0.10933613 +0.10930610 +0.10927609 +0.10924611 +0.10921614 +0.10918620 +0.10915628 +0.10912638 +0.10909650 +0.10906665 +0.10903681 +0.10900700 +0.10897720 +0.10894743 +0.10891768 +0.10888795 +0.10885824 +0.10882855 +0.10879889 +0.10876924 +0.10873961 +0.10871001 +0.10868043 +0.10865087 +0.10862133 +0.10859181 +0.10856231 +0.10853283 +0.10850337 +0.10847393 +0.10844452 +0.10841512 +0.10838575 +0.10835639 +0.10832706 +0.10829775 +0.10826846 +0.10823919 +0.10820993 +0.10818070 +0.10815150 +0.10812231 +0.10809314 +0.10806399 +0.10803486 +0.10800576 +0.10797667 +0.10794760 +0.10791856 +0.10788953 +0.10786053 +0.10783154 +0.10780258 +0.10777363 +0.10774471 +0.10771581 +0.10768692 +0.10765806 +0.10762922 +0.10760039 +0.10757159 +0.10754281 +0.10751404 +0.10748530 +0.10745658 +0.10742788 +0.10739919 +0.10737053 +0.10734189 +0.10731327 +0.10728466 +0.10725608 +0.10722752 +0.10719897 +0.10717045 +0.10714195 +0.10711346 +0.10708500 +0.10705655 +0.10702813 +0.10699973 +0.10697134 +0.10694297 +0.10691463 +0.10688630 +0.10685800 +0.10682971 +0.10680144 +0.10677319 +0.10674496 +0.10671676 +0.10668857 +0.10666040 +0.10663224 +0.10660411 +0.10657600 +0.10654791 +0.10651984 +0.10649178 +0.10646375 +0.10643573 +0.10640774 +0.10637976 +0.10635180 +0.10632386 +0.10629594 +0.10626804 +0.10624016 +0.10621230 +0.10618446 +0.10615664 +0.10612883 +0.10610105 +0.10607328 +0.10604553 +0.10601780 +0.10599009 +0.10596240 +0.10593473 +0.10590708 +0.10587944 +0.10585183 +0.10582423 +0.10579666 +0.10576910 +0.10574156 +0.10571404 +0.10568654 +0.10565905 +0.10563159 +0.10560414 +0.10557671 +0.10554930 +0.10552191 +0.10549454 +0.10546719 +0.10543986 +0.10541254 +0.10538524 +0.10535796 +0.10533070 +0.10530346 +0.10527624 +0.10524903 +0.10522185 +0.10519468 +0.10516753 +0.10514040 +0.10511328 +0.10508619 +0.10505911 +0.10503206 +0.10500502 +0.10497799 +0.10495099 +0.10492401 +0.10489704 +0.10487009 +0.10484316 +0.10481625 +0.10478935 +0.10476248 +0.10473562 +0.10470878 +0.10468195 +0.10465515 +0.10462836 +0.10460160 +0.10457485 +0.10454811 +0.10452140 +0.10449470 +0.10446802 +0.10444136 +0.10441472 +0.10438810 +0.10436149 +0.10433490 +0.10430833 +0.10428177 +0.10425524 +0.10422872 +0.10420222 +0.10417573 +0.10414927 +0.10412282 +0.10409639 +0.10406998 +0.10404358 +0.10401721 +0.10399085 +0.10396450 +0.10393818 +0.10391187 +0.10388558 +0.10385931 +0.10383305 +0.10380682 +0.10378060 +0.10375439 +0.10372821 +0.10370204 +0.10367589 +0.10364976 +0.10362364 +0.10359754 +0.10357146 +0.10354540 +0.10351935 +0.10349332 +0.10346731 +0.10344131 +0.10341533 +0.10338937 +0.10336343 +0.10333750 +0.10331159 +0.10328570 +0.10325982 +0.10323396 +0.10320812 +0.10318230 +0.10315649 +0.10313070 +0.10310492 +0.10307917 +0.10305343 +0.10302770 +0.10300200 +0.10297631 +0.10295064 +0.10292498 +0.10289934 +0.10287372 +0.10284811 +0.10282252 +0.10279695 +0.10277140 +0.10274586 +0.10272034 +0.10269483 +0.10266934 +0.10264387 +0.10261842 +0.10259298 +0.10256755 +0.10254215 +0.10251676 +0.10249139 +0.10246603 +0.10244069 +0.10241537 +0.10239006 +0.10236477 +0.10233950 +0.10231424 +0.10228900 +0.10226377 +0.10223857 +0.10221337 +0.10218820 +0.10216304 +0.10213790 +0.10211277 +0.10208766 +0.10206256 +0.10203749 +0.10201242 +0.10198738 +0.10196235 +0.10193733 +0.10191234 +0.10188736 +0.10186239 +0.10183744 +0.10181251 +0.10178759 +0.10176269 +0.10173781 +0.10171294 +0.10168808 +0.10166325 +0.10163842 +0.10161362 +0.10158883 +0.10156406 +0.10153930 +0.10151456 +0.10148983 +0.10146512 +0.10144043 +0.10141575 +0.10139109 +0.10136644 +0.10134181 +0.10131719 +0.10129259 +0.10126801 +0.10124344 +0.10121889 +0.10119435 +0.10116983 +0.10114532 +0.10112083 +0.10109636 +0.10107190 +0.10104745 +0.10102303 +0.10099861 +0.10097422 +0.10094983 +0.10092547 +0.10090112 +0.10087678 +0.10085246 +0.10082816 +0.10080387 +0.10077959 +0.10075533 +0.10073109 +0.10070686 +0.10068265 +0.10065845 +0.10063427 +0.10061010 +0.10058595 +0.10056181 +0.10053769 +0.10051359 +0.10048949 +0.10046542 +0.10044136 +0.10041731 +0.10039328 +0.10036927 +0.10034526 +0.10032128 +0.10029731 +0.10027335 +0.10024941 +0.10022549 +0.10020158 +0.10017768 +0.10015380 +0.10012993 +0.10010608 +0.10008225 +0.10005842 +0.10003462 +0.10001083 +0.09998705 +0.09996329 +0.09993954 +0.09991581 +0.09989209 +0.09986839 +0.09984470 +0.09982102 +0.09979736 +0.09977372 +0.09975009 +0.09972647 +0.09970287 +0.09967929 +0.09965572 +0.09963216 +0.09960862 +0.09958509 +0.09956157 +0.09953808 +0.09951459 +0.09949112 +0.09946766 +0.09944422 +0.09942080 +0.09939738 +0.09937399 +0.09935060 +0.09932723 +0.09930388 +0.09928054 +0.09925721 +0.09923390 +0.09921060 +0.09918731 +0.09916404 +0.09914079 +0.09911755 +0.09909432 +0.09907111 +0.09904791 +0.09902472 +0.09900155 +0.09897840 +0.09895525 +0.09893212 +0.09890901 +0.09888591 +0.09886282 +0.09883975 +0.09881669 +0.09879365 +0.09877061 +0.09874760 +0.09872459 +0.09870160 +0.09867863 +0.09865567 +0.09863272 +0.09860979 +0.09858686 +0.09856396 +0.09854107 +0.09851819 +0.09849532 +0.09847247 +0.09844963 +0.09842681 +0.09840400 +0.09838120 +0.09835841 +0.09833564 +0.09831289 +0.09829014 +0.09826741 +0.09824470 +0.09822200 +0.09819931 +0.09817663 +0.09815397 +0.09813132 +0.09810868 +0.09808606 +0.09806345 +0.09804086 +0.09801828 +0.09799571 +0.09797315 +0.09795061 +0.09792808 +0.09790556 +0.09788306 +0.09786057 +0.09783810 +0.09781563 +0.09779318 +0.09777075 +0.09774832 +0.09772591 +0.09770351 +0.09768113 +0.09765876 +0.09763640 +0.09761405 +0.09759172 +0.09756940 +0.09754709 +0.09752480 +0.09750252 +0.09748025 +0.09745799 +0.09743575 +0.09741352 +0.09739130 +0.09736910 +0.09734691 +0.09732473 +0.09730256 +0.09728041 +0.09725827 +0.09723614 +0.09721402 +0.09719192 +0.09716983 +0.09714775 +0.09712568 +0.09710363 +0.09708159 +0.09705956 +0.09703754 +0.09701554 +0.09699355 +0.09697157 +0.09694960 +0.09692765 +0.09690571 +0.09688378 +0.09686186 +0.09683996 +0.09681806 +0.09679618 +0.09677431 +0.09675246 +0.09673061 +0.09670878 +0.09668696 +0.09666515 +0.09664335 +0.09662157 +0.09659980 +0.09657804 +0.09655629 +0.09653455 +0.09651283 +0.09649111 +0.09646941 +0.09644772 +0.09642605 +0.09640438 +0.09638272 +0.09636108 +0.09633945 +0.09631783 +0.09629622 +0.09627463 +0.09625304 +0.09623147 +0.09620991 +0.09618836 +0.09616682 +0.09614529 +0.09612377 +0.09610227 +0.09608077 +0.09605929 +0.09603782 +0.09601636 +0.09599491 +0.09597347 +0.09595205 +0.09593063 +0.09590923 +0.09588783 +0.09586645 +0.09584508 +0.09582372 +0.09580237 +0.09578103 +0.09575970 +0.09573838 +0.09571707 +0.09569578 +0.09567449 +0.09565321 +0.09563195 +0.09561069 +0.09558945 +0.09556821 +0.09554699 +0.09552578 +0.09550457 +0.09548338 +0.09546220 +0.09544102 +0.09541986 +0.09539871 +0.09537757 +0.09535643 +0.09533531 +0.09531419 +0.09529309 +0.09527200 +0.09525091 +0.09522983 +0.09520877 +0.09518771 +0.09516666 +0.09514562 +0.09512459 +0.09510357 +0.09508256 +0.09506156 +0.09504056 +0.09501958 +0.09499860 +0.09497763 +0.09495667 +0.09493572 +0.09491477 +0.09489384 +0.09487291 +0.09485199 +0.09483108 +0.09481017 +0.09478927 +0.09476838 +0.09474750 +0.09472662 +0.09470575 +0.09468489 +0.09466403 +0.09464318 +0.09462233 +0.09460149 +0.09458066 +0.09455983 +0.09453901 +0.09451819 +0.09449738 +0.09447657 +0.09445577 +0.09443497 +0.09441417 +0.09439338 +0.09437259 +0.09435180 +0.09433101 +0.09431023 +0.09428945 +0.09426867 +0.09424789 +0.09422711 +0.09420633 +0.09418554 +0.09416476 +0.09414398 +0.09412319 +0.09410240 +0.09408161 +0.09406081 +0.09404001 +0.09401920 +0.09399838 +0.09397756 +0.09395673 +0.09393588 +0.09391503 +0.09389416 +0.09387329 +0.09385239 +0.09383148 +0.09381056 +0.09378961 +0.09376864 +0.09374765 +0.09372663 +0.09370559 +0.09368451 +0.09366341 +0.09364227 +0.09362109 +0.09359986 +0.09357860 +0.09355728 +0.09353591 +0.09351448 +0.09349298 +0.09347142 +0.09344977 +0.09342805 +0.09340623 +0.09338431 +0.09336228 +0.09334013 +0.09331785 +0.09329541 +0.09327282 +0.09325004 +0.09322705 +0.09320384 +0.09318038 +0.09315663 +0.09313256 +0.09310813 +0.09308329 +0.09305797 +0.09303212 +0.09300564 +0.09297843 +0.09295038 +0.09292134 +0.09289112 +0.09285949 +0.09282616 +0.09279077 +0.09275282 +0.09271169 +0.09266653 +0.09261622 +0.09255920 +0.09249330 +0.09241548 +0.09232145 +0.09220522 +0.09205885 +0.09187309 +0.09164116 +0.09136833 +0.09108398 +0.09083424 +0.09064487 +0.09050579 +0.09039648 +0.09030327 +0.09021917 +0.09014062 +0.09006565 +0.08999310 +0.08992223 +0.08985255 +0.08978373 +0.08971551 +0.08964773 +0.08958026 +0.08951300 +0.08944589 +0.08937886 +0.08931187 +0.08924489 +0.08917788 +0.08911084 +0.08904373 +0.08897656 +0.08890930 +0.08884195 +0.08877450 +0.08870695 +0.08863929 +0.08857153 +0.08850365 +0.08843567 +0.08836757 +0.08829936 +0.08823103 +0.08816260 +0.08809406 +0.08802541 +0.08795666 +0.08788780 +0.08781884 +0.08774978 +0.08768063 +0.08761138 +0.08754204 +0.08747262 +0.08740311 +0.08733351 +0.08726384 +0.08719409 +0.08712427 +0.08705439 +0.08698443 +0.08691441 +0.08684434 +0.08677421 +0.08670402 +0.08663379 +0.08656351 +0.08649319 +0.08642283 +0.08635243 +0.08628200 +0.08621154 +0.08614106 +0.08607056 +0.08600003 +0.08592949 +0.08585894 +0.08578839 +0.08571782 +0.08564726 +0.08557669 +0.08550614 +0.08543559 +0.08536505 +0.08529452 +0.08522402 +0.08515354 +0.08508308 +0.08501265 +0.08494225 +0.08487189 +0.08480156 +0.08473128 +0.08466104 +0.08459085 +0.08452071 +0.08445062 +0.08438059 +0.08431062 +0.08424071 +0.08417087 +0.08410110 +0.08403140 +0.08396177 +0.08389223 +0.08382276 +0.08375338 +0.08368408 +0.08361487 +0.08354576 +0.08347674 +0.08340782 +0.08333899 +0.08327027 +0.08320166 +0.08313315 +0.08306475 +0.08299647 +0.08292829 +0.08286024 +0.08279230 +0.08272449 +0.08265680 +0.08258923 +0.08252179 +0.08245448 +0.08238731 +0.08232026 +0.08225335 +0.08218657 +0.08211994 +0.08205344 +0.08198708 +0.08192087 +0.08185479 +0.08178887 +0.08172309 +0.08165745 +0.08159197 +0.08152663 +0.08146145 +0.08139642 +0.08133153 +0.08126680 +0.08120223 +0.08113781 +0.08107354 +0.08100943 +0.08094548 +0.08088168 +0.08081804 +0.08075456 +0.08069124 +0.08062807 +0.08056506 +0.08050222 +0.08043953 +0.08037700 +0.08031463 +0.08025242 +0.08019037 +0.08012847 +0.08006674 +0.08000517 +0.07994376 +0.07988251 +0.07982142 +0.07976048 +0.07969971 +0.07963910 +0.07957865 +0.07951836 +0.07945823 +0.07939826 +0.07933845 +0.07927880 +0.07921931 +0.07915998 +0.07910081 +0.07904181 +0.07898296 +0.07892427 +0.07886575 +0.07880739 +0.07874919 +0.07869115 +0.07863327 +0.07857556 +0.07851801 +0.07846063 +0.07840340 +0.07834635 +0.07828945 +0.07823273 +0.07817616 +0.07811977 +0.07806354 +0.07800747 +0.07795158 +0.07789585 +0.07784029 +0.07778490 +0.07772968 +0.07767462 +0.07761974 +0.07756503 +0.07751048 +0.07745610 +0.07740190 +0.07734786 +0.07729399 +0.07724029 +0.07718676 +0.07713340 +0.07708021 +0.07702718 +0.07697433 +0.07692163 +0.07686911 +0.07681675 +0.07676455 +0.07671252 +0.07666065 +0.07660894 +0.07655739 +0.07650601 +0.07645478 +0.07640371 +0.07635279 +0.07630204 +0.07625143 +0.07620098 +0.07615068 +0.07610053 +0.07605054 +0.07600069 +0.07595099 +0.07590143 +0.07585202 +0.07580276 +0.07575363 +0.07570465 +0.07565581 +0.07560711 +0.07555855 +0.07551012 +0.07546183 +0.07541368 +0.07536566 +0.07531778 +0.07527002 +0.07522240 +0.07517491 +0.07512755 +0.07508032 +0.07503321 +0.07498623 +0.07493938 +0.07489265 +0.07484605 +0.07479957 +0.07475321 +0.07470698 +0.07466087 +0.07461487 +0.07456900 +0.07452324 +0.07447760 +0.07443208 +0.07438668 +0.07434139 +0.07429622 +0.07425116 +0.07420622 +0.07416138 +0.07411667 +0.07407206 +0.07402756 +0.07398318 +0.07393890 +0.07389474 +0.07385068 +0.07380673 +0.07376289 +0.07371916 +0.07367553 +0.07363201 +0.07358859 +0.07354528 +0.07350207 +0.07345897 +0.07341597 +0.07337307 +0.07333028 +0.07328758 +0.07324499 +0.07320250 +0.07316011 +0.07311781 +0.07307562 +0.07303353 +0.07299153 +0.07294963 +0.07290783 +0.07286612 +0.07282451 +0.07278300 +0.07274158 +0.07270026 +0.07265903 +0.07261790 +0.07257686 +0.07253591 +0.07249505 +0.07245429 +0.07241362 +0.07237304 +0.07233255 +0.07229215 +0.07225184 +0.07221162 +0.07217149 +0.07213145 +0.07209150 +0.07205163 +0.07201186 +0.07197217 +0.07193256 +0.07189305 +0.07185362 +0.07181427 +0.07177501 +0.07173584 +0.07169675 +0.07165774 +0.07161882 +0.07157998 +0.07154123 +0.07150256 +0.07146396 +0.07142546 +0.07138703 +0.07134868 +0.07131042 +0.07127224 +0.07123413 +0.07119611 +0.07115816 +0.07112030 +0.07108251 +0.07104481 +0.07100718 +0.07096963 +0.07093215 +0.07089475 +0.07085743 +0.07082019 +0.07078302 +0.07074593 +0.07070892 +0.07067198 +0.07063511 +0.07059832 +0.07056161 +0.07052496 +0.07048840 +0.07045190 +0.07041548 +0.07037913 +0.07034285 +0.07030665 +0.07027052 +0.07023446 +0.07019847 +0.07016255 +0.07012670 +0.07009093 +0.07005522 +0.07001958 +0.06998402 +0.06994852 +0.06991309 +0.06987773 +0.06984244 +0.06980722 +0.06977207 +0.06973698 +0.06970196 +0.06966701 +0.06963212 +0.06959731 +0.06956255 +0.06952787 +0.06949325 +0.06945869 +0.06942421 +0.06938978 +0.06935542 +0.06932113 +0.06928690 +0.06925274 +0.06921863 +0.06918460 +0.06915062 +0.06911671 +0.06908286 +0.06904908 +0.06901535 +0.06898169 +0.06894810 +0.06891456 +0.06888108 +0.06884767 +0.06881432 +0.06878102 +0.06874779 +0.06871462 +0.06868151 +0.06864846 +0.06861547 +0.06858253 +0.06854966 +0.06851685 +0.06848409 +0.06845140 +0.06841876 +0.06838618 +0.06835366 +0.06832119 +0.06828879 +0.06825644 +0.06822415 +0.06819191 +0.06815973 +0.06812761 +0.06809554 +0.06806354 +0.06803158 +0.06799968 +0.06796784 +0.06793605 +0.06790432 +0.06787264 +0.06784102 +0.06780945 +0.06777794 +0.06774648 +0.06771507 +0.06768372 +0.06765242 +0.06762118 +0.06758999 +0.06755885 +0.06752776 +0.06749673 +0.06746574 +0.06743482 +0.06740394 +0.06737311 +0.06734234 +0.06731162 +0.06728095 +0.06725033 +0.06721976 +0.06718924 +0.06715877 +0.06712836 +0.06709799 +0.06706767 +0.06703741 +0.06700719 +0.06697702 +0.06694690 +0.06691683 +0.06688681 +0.06685684 +0.06682692 +0.06679705 +0.06676722 +0.06673745 +0.06670772 +0.06667804 +0.06664840 +0.06661882 +0.06658928 +0.06655979 +0.06653034 +0.06650095 +0.06647160 +0.06644229 +0.06641304 +0.06638382 +0.06635466 +0.06632554 +0.06629647 +0.06626744 +0.06623846 +0.06620952 +0.06618063 +0.06615179 +0.06612299 +0.06609423 +0.06606552 +0.06603685 +0.06600823 +0.06597965 +0.06595112 +0.06592263 +0.06589418 +0.06586578 +0.06583742 +0.06580910 +0.06578083 +0.06575260 +0.06572442 +0.06569627 +0.06566817 +0.06564011 +0.06561210 +0.06558412 +0.06555619 +0.06552830 +0.06550045 +0.06547265 +0.06544488 +0.06541716 +0.06538948 +0.06536184 +0.06533424 +0.06530668 +0.06527916 +0.06525168 +0.06522425 +0.06519685 +0.06516949 +0.06514218 +0.06511490 +0.06508767 +0.06506047 +0.06503332 +0.06500620 +0.06497912 +0.06495208 +0.06492509 +0.06489813 +0.06487121 +0.06484432 +0.06481748 +0.06479068 +0.06476391 +0.06473718 +0.06471049 +0.06468384 +0.06465723 +0.06463065 +0.06460411 +0.06457761 +0.06455115 +0.06452473 +0.06449834 +0.06447199 +0.06444567 +0.06441940 +0.06439316 +0.06436696 +0.06434079 +0.06431466 +0.06428857 +0.06426251 +0.06423649 +0.06421050 +0.06418456 +0.06415864 +0.06413277 +0.06410692 +0.06408112 +0.06405535 +0.06402961 +0.06400391 +0.06397825 +0.06395262 +0.06392702 +0.06390146 +0.06387594 +0.06385045 +0.06382499 +0.06379957 +0.06377419 +0.06374883 +0.06372351 +0.06369823 +0.06367298 +0.06364776 +0.06362258 +0.06359743 +0.06357231 +0.06354723 +0.06352218 +0.06349717 +0.06347218 +0.06344723 +0.06342232 +0.06339743 +0.06337258 +0.06334776 +0.06332298 +0.06329822 +0.06327350 +0.06324881 +0.06322416 +0.06319953 +0.06317494 +0.06315038 +0.06312585 +0.06310136 +0.06307689 +0.06305246 +0.06302806 +0.06300369 +0.06297935 +0.06295504 +0.06293076 +0.06290652 +0.06288230 +0.06285812 +0.06283397 +0.06280985 +0.06278576 +0.06276169 +0.06273766 +0.06271366 +0.06268970 +0.06266576 +0.06264185 +0.06261797 +0.06259412 +0.06257030 +0.06254651 +0.06252275 +0.06249902 +0.06247532 +0.06245165 +0.06242801 +0.06240439 +0.06238081 +0.06235726 +0.06233373 +0.06231024 +0.06228677 +0.06226333 +0.06223992 +0.06221654 +0.06219319 +0.06216987 +0.06214657 +0.06212331 +0.06210007 +0.06207686 +0.06205368 +0.06203053 +0.06200740 +0.06198430 +0.06196123 +0.06193819 +0.06191518 +0.06189219 +0.06186923 +0.06184630 +0.06182340 +0.06180052 +0.06177767 +0.06175485 +0.06173206 +0.06170929 +0.06168655 +0.06166384 +0.06164115 +0.06161849 +0.06159586 +0.06157325 +0.06155067 +0.06152812 +0.06150559 +0.06148309 +0.06146062 +0.06143817 +0.06141575 +0.06139335 +0.06137098 +0.06134864 +0.06132632 +0.06130403 +0.06128176 +0.06125952 +0.06123731 +0.06121512 +0.06119296 +0.06117082 +0.06114871 +0.06112662 +0.06110456 +0.06108252 +0.06106051 +0.06103852 +0.06101656 +0.06099463 +0.06097272 +0.06095083 +0.06092897 +0.06090713 +0.06088532 +0.06086353 +0.06084177 +0.06082003 +0.06079831 +0.06077663 +0.06075496 +0.06073332 +0.06071170 +0.06069011 +0.06066854 +0.06064699 +0.06062547 +0.06060398 +0.06058250 +0.06056105 +0.06053963 +0.06051823 +0.06049685 +0.06047549 +0.06045416 +0.06043285 +0.06041157 +0.06039031 +0.06036907 +0.06034785 +0.06032666 +0.06030549 +0.06028435 +0.06026323 +0.06024213 +0.06022105 +0.06019999 +0.06017896 +0.06015795 +0.06013697 +0.06011601 +0.06009507 +0.06007415 +0.06005325 +0.06003238 +0.06001153 +0.05999070 +0.05996989 +0.05994911 +0.05992835 +0.05990760 +0.05988689 +0.05986619 +0.05984552 +0.05982486 +0.05980423 +0.05978363 +0.05976304 +0.05974247 +0.05972193 +0.05970141 +0.05968091 +0.05966043 +0.05963997 +0.05961953 +0.05959912 +0.05957872 +0.05955835 +0.05953800 +0.05951767 +0.05949736 +0.05947707 +0.05945681 +0.05943656 +0.05941633 +0.05939613 +0.05937595 +0.05935578 +0.05933564 +0.05931552 +0.05929542 +0.05927534 +0.05925528 +0.05923524 +0.05921522 +0.05919522 +0.05917524 +0.05915529 +0.05913535 +0.05911543 +0.05909553 +0.05907566 +0.05905580 +0.05903596 +0.05901614 +0.05899635 +0.05897657 +0.05895681 +0.05893708 +0.05891736 +0.05889766 +0.05887798 +0.05885832 +0.05883868 +0.05881906 +0.05879946 +0.05877988 +0.05876032 +0.05874078 +0.05872126 +0.05870175 +0.05868227 +0.05866280 +0.05864336 +0.05862393 +0.05860452 +0.05858513 +0.05856576 +0.05854641 +0.05852708 +0.05850777 +0.05848847 +0.05846920 +0.05844994 +0.05843071 +0.05841149 +0.05839229 +0.05837310 +0.05835394 +0.05833480 +0.05831567 +0.05829656 +0.05827747 +0.05825840 +0.05823935 +0.05822031 +0.05820130 +0.05818230 +0.05816332 +0.05814436 +0.05812541 +0.05810649 +0.05808758 +0.05806869 +0.05804982 +0.05803097 +0.05801213 +0.05799331 +0.05797451 +0.05795573 +0.05793696 +0.05791822 +0.05789949 +0.05788078 +0.05786208 +0.05784341 +0.05782475 +0.05780610 +0.05778748 +0.05776887 +0.05775028 +0.05773171 +0.05771316 +0.05769462 +0.05767610 +0.05765759 +0.05763911 +0.05762064 +0.05760219 +0.05758375 +0.05756533 +0.05754693 +0.05752855 +0.05751018 +0.05749183 +0.05747350 +0.05745518 +0.05743688 +0.05741860 +0.05740033 +0.05738208 +0.05736385 +0.05734563 +0.05732743 +0.05730925 +0.05729108 +0.05727293 +0.05725480 +0.05723668 +0.05721858 +0.05720050 +0.05718243 +0.05716437 +0.05714634 +0.05712832 +0.05711032 +0.05709233 +0.05707436 +0.05705640 +0.05703846 +0.05702054 +0.05700263 +0.05698474 +0.05696687 +0.05694901 +0.05693116 +0.05691334 +0.05689552 +0.05687773 +0.05685995 +0.05684218 +0.05682443 +0.05680670 +0.05678898 +0.05677128 +0.05675359 +0.05673592 +0.05671827 +0.05670063 +0.05668300 +0.05666540 +0.05664780 +0.05663022 +0.05661266 +0.05659511 +0.05657758 +0.05656006 +0.05654256 +0.05652507 +0.05650760 +0.05649015 +0.05647271 +0.05645528 +0.05643787 +0.05642047 +0.05640309 +0.05638572 +0.05636837 +0.05635104 +0.05633371 +0.05631641 +0.05629912 +0.05628184 +0.05626458 +0.05624733 +0.05623010 +0.05621288 +0.05619568 +0.05617849 +0.05616131 +0.05614415 +0.05612701 +0.05610988 +0.05609276 +0.05607566 +0.05605857 +0.05604150 +0.05602444 +0.05600740 +0.05599037 +0.05597335 +0.05595635 +0.05593936 +0.05592239 +0.05590543 +0.05588849 +0.05587156 +0.05585464 +0.05583774 +0.05582085 +0.05580398 +0.05578712 +0.05577027 +0.05575344 +0.05573662 +0.05571982 +0.05570303 +0.05568625 +0.05566949 +0.05565274 +0.05563601 +0.05561929 +0.05560258 +0.05558589 +0.05556921 +0.05555254 +0.05553589 +0.05551925 +0.05550263 +0.05548602 +0.05546942 +0.05545283 +0.05543626 +0.05541971 +0.05540316 +0.05538663 +0.05537011 +0.05535361 +0.05533712 +0.05532064 +0.05530418 +0.05528773 +0.05527129 +0.05525487 +0.05523846 +0.05522206 +0.05520568 +0.05518931 +0.05517295 +0.05515661 +0.05514027 +0.05512396 +0.05510765 +0.05509136 +0.05507508 +0.05505881 +0.05504256 +0.05502632 +0.05501009 +0.05499388 +0.05497767 +0.05496149 +0.05494531 +0.05492915 +0.05491300 +0.05489686 +0.05488073 +0.05486462 +0.05484852 +0.05483244 +0.05481636 +0.05480030 +0.05478425 +0.05476821 +0.05475219 +0.05473618 +0.05472018 +0.05470419 +0.05468822 +0.05467226 +0.05465631 +0.05464037 +0.05462445 +0.05460854 +0.05459264 +0.05457675 +0.05456088 +0.05454502 +0.05452917 +0.05451333 +0.05449750 +0.05448169 +0.05446589 +0.05445010 +0.05443432 +0.05441856 +0.05440281 +0.05438707 +0.05437134 +0.05435562 +0.05433992 +0.05432423 +0.05430855 +0.05429288 +0.05427722 +0.05426158 +0.05424595 +0.05423032 +0.05421472 +0.05419912 +0.05418353 +0.05416796 +0.05415240 +0.05413685 +0.05412131 +0.05410579 +0.05409027 +0.05407477 +0.05405928 +0.05404380 +0.05402833 +0.05401288 +0.05399744 +0.05398200 +0.05396658 +0.05395117 +0.05393577 +0.05392039 +0.05390501 +0.05388965 +0.05387430 +0.05385896 +0.05384363 +0.05382831 +0.05381301 +0.05379771 +0.05378243 +0.05376716 +0.05375190 +0.05373665 +0.05372141 +0.05370618 +0.05369097 +0.05367576 +0.05366057 +0.05364539 +0.05363022 +0.05361506 +0.05359991 +0.05358477 +0.05356965 +0.05355453 +0.05353943 +0.05352433 +0.05350925 +0.05349418 +0.05347912 +0.05346407 +0.05344904 +0.05343401 +0.05341899 +0.05340399 +0.05338899 +0.05337401 +0.05335904 +0.05334408 +0.05332913 +0.05331419 +0.05329926 +0.05328434 +0.05326943 +0.05325454 +0.05323965 +0.05322478 +0.05320991 diff --git a/Project2-Character-Recognition/build/xor_W1_DxH_2_x_4.txt b/Project2-Character-Recognition/build/xor_W1_DxH_2_x_4.txt new file mode 100644 index 0000000..8cf7cf2 --- /dev/null +++ b/Project2-Character-Recognition/build/xor_W1_DxH_2_x_4.txt @@ -0,0 +1,8 @@ +-0.49231626 +-0.40751672 +2.82573045 +3.57505757 +0.34513172 +1.16218060 +-0.72394906 +3.04743704 diff --git a/Project2-Character-Recognition/build/xor_W2_HxC_4_x_2.txt b/Project2-Character-Recognition/build/xor_W2_HxC_4_x_2.txt new file mode 100644 index 0000000..707b05d --- /dev/null +++ b/Project2-Character-Recognition/build/xor_W2_HxC_4_x_2.txt @@ -0,0 +1,8 @@ +0.37121719 +-0.88363409 +1.95692802 +-0.25860953 +1.19070299 +-1.99379371 +-2.84738739 +1.87420210 diff --git a/Project2-Character-Recognition/build/xor_losses.txt b/Project2-Character-Recognition/build/xor_losses.txt new file mode 100644 index 0000000..62c1c96 --- /dev/null +++ b/Project2-Character-Recognition/build/xor_losses.txt @@ -0,0 +1,1001 @@ +0.73675045 +0.71901634 +0.70849981 +0.70234181 +0.69876269 +0.69669082 +0.69549348 +0.69480141 +0.69440055 +0.69416731 +0.69403047 +0.69394905 +0.69389950 +0.69386828 +0.69384761 +0.69383302 +0.69382195 +0.69381291 +0.69380507 +0.69379793 +0.69379121 +0.69378476 +0.69377848 +0.69377231 +0.69376624 +0.69376023 +0.69375427 +0.69374837 +0.69374252 +0.69373671 +0.69373094 +0.69372522 +0.69371954 +0.69371389 +0.69370829 +0.69370273 +0.69369721 +0.69369172 +0.69368628 +0.69368087 +0.69367550 +0.69367016 +0.69366486 +0.69365960 +0.69365437 +0.69364918 +0.69364402 +0.69363889 +0.69363380 +0.69362874 +0.69362372 +0.69361872 +0.69361376 +0.69360884 +0.69360394 +0.69359907 +0.69359424 +0.69358943 +0.69358466 +0.69357991 +0.69357519 +0.69357051 +0.69356585 +0.69356122 +0.69355661 +0.69355204 +0.69354749 +0.69354297 +0.69353847 +0.69353401 +0.69352956 +0.69352515 +0.69352076 +0.69351639 +0.69351205 +0.69350773 +0.69350344 +0.69349917 +0.69349493 +0.69349071 +0.69348651 +0.69348234 +0.69347819 +0.69347406 +0.69346995 +0.69346587 +0.69346180 +0.69345776 +0.69345374 +0.69344974 +0.69344576 +0.69344181 +0.69343787 +0.69343395 +0.69343005 +0.69342618 +0.69342232 +0.69341848 +0.69341466 +0.69341085 +0.69340707 +0.69340331 +0.69339956 +0.69339583 +0.69339212 +0.69338843 +0.69338475 +0.69338109 +0.69337745 +0.69337383 +0.69337022 +0.69336663 +0.69336305 +0.69335949 +0.69335595 +0.69335242 +0.69334891 +0.69334541 +0.69334193 +0.69333846 +0.69333501 +0.69333157 +0.69332815 +0.69332474 +0.69332134 +0.69331796 +0.69331459 +0.69331124 +0.69330790 +0.69330458 +0.69330126 +0.69329796 +0.69329468 +0.69329140 +0.69328814 +0.69328489 +0.69328165 +0.69327843 +0.69327522 +0.69327202 +0.69326883 +0.69326565 +0.69326248 +0.69325933 +0.69325619 +0.69325305 +0.69324993 +0.69324682 +0.69324372 +0.69324063 +0.69323756 +0.69323449 +0.69323143 +0.69322838 +0.69322534 +0.69322232 +0.69321930 +0.69321629 +0.69321329 +0.69321030 +0.69320732 +0.69320434 +0.69320138 +0.69319843 +0.69319548 +0.69319255 +0.69318962 +0.69318670 +0.69318378 +0.69318088 +0.69317799 +0.69317510 +0.69317222 +0.69316935 +0.69316648 +0.69316362 +0.69316078 +0.69315793 +0.69315510 +0.69315227 +0.69314945 +0.69314663 +0.69314383 +0.69314103 +0.69313823 +0.69313544 +0.69313266 +0.69312989 +0.69312712 +0.69312435 +0.69312160 +0.69311885 +0.69311610 +0.69311336 +0.69311062 +0.69310790 +0.69310517 +0.69310245 +0.69309974 +0.69309703 +0.69309433 +0.69309163 +0.69308894 +0.69308625 +0.69308356 +0.69308088 +0.69307821 +0.69307553 +0.69307287 +0.69307020 +0.69306754 +0.69306489 +0.69306224 +0.69305959 +0.69305694 +0.69305430 +0.69305167 +0.69304903 +0.69304640 +0.69304377 +0.69304115 +0.69303853 +0.69303591 +0.69303329 +0.69303068 +0.69302807 +0.69302546 +0.69302285 +0.69302025 +0.69301765 +0.69301505 +0.69301245 +0.69300985 +0.69300726 +0.69300467 +0.69300208 +0.69299949 +0.69299690 +0.69299432 +0.69299173 +0.69298915 +0.69298657 +0.69298398 +0.69298140 +0.69297882 +0.69297625 +0.69297367 +0.69297109 +0.69296851 +0.69296593 +0.69296336 +0.69296078 +0.69295820 +0.69295563 +0.69295305 +0.69295047 +0.69294790 +0.69294532 +0.69294274 +0.69294016 +0.69293758 +0.69293500 +0.69293242 +0.69292984 +0.69292725 +0.69292467 +0.69292208 +0.69291949 +0.69291690 +0.69291431 +0.69291172 +0.69290912 +0.69290653 +0.69290393 +0.69290133 +0.69289873 +0.69289612 +0.69289351 +0.69289090 +0.69288829 +0.69288568 +0.69288306 +0.69288044 +0.69287781 +0.69287518 +0.69287255 +0.69286992 +0.69286728 +0.69286464 +0.69286199 +0.69285934 +0.69285669 +0.69285403 +0.69285137 +0.69284871 +0.69284604 +0.69284336 +0.69284068 +0.69283800 +0.69283531 +0.69283262 +0.69282992 +0.69282721 +0.69282450 +0.69282179 +0.69281907 +0.69281634 +0.69281361 +0.69281087 +0.69280813 +0.69280538 +0.69280262 +0.69279986 +0.69279709 +0.69279431 +0.69279153 +0.69278874 +0.69278594 +0.69278313 +0.69278032 +0.69277750 +0.69277467 +0.69277184 +0.69276899 +0.69276614 +0.69276328 +0.69276042 +0.69275754 +0.69275465 +0.69275176 +0.69274886 +0.69274594 +0.69274302 +0.69274009 +0.69273715 +0.69273420 +0.69273124 +0.69272827 +0.69272529 +0.69272230 +0.69271930 +0.69271629 +0.69271326 +0.69271023 +0.69270718 +0.69270413 +0.69270106 +0.69269798 +0.69269489 +0.69269179 +0.69268867 +0.69268554 +0.69268240 +0.69267925 +0.69267608 +0.69267290 +0.69266971 +0.69266650 +0.69266329 +0.69266005 +0.69265680 +0.69265354 +0.69265027 +0.69264697 +0.69264367 +0.69264035 +0.69263701 +0.69263366 +0.69263029 +0.69262691 +0.69262351 +0.69262009 +0.69261666 +0.69261321 +0.69260975 +0.69260626 +0.69260276 +0.69259924 +0.69259571 +0.69259215 +0.69258858 +0.69258499 +0.69258138 +0.69257775 +0.69257410 +0.69257043 +0.69256674 +0.69256303 +0.69255930 +0.69255555 +0.69255178 +0.69254799 +0.69254418 +0.69254034 +0.69253648 +0.69253260 +0.69252870 +0.69252478 +0.69252083 +0.69251686 +0.69251286 +0.69250884 +0.69250480 +0.69250073 +0.69249664 +0.69249252 +0.69248837 +0.69248420 +0.69248000 +0.69247578 +0.69247153 +0.69246725 +0.69246295 +0.69245861 +0.69245425 +0.69244986 +0.69244544 +0.69244099 +0.69243651 +0.69243200 +0.69242746 +0.69242289 +0.69241829 +0.69241366 +0.69240899 +0.69240429 +0.69239956 +0.69239479 +0.69239000 +0.69238516 +0.69238029 +0.69237539 +0.69237045 +0.69236548 +0.69236047 +0.69235542 +0.69235033 +0.69234521 +0.69234005 +0.69233485 +0.69232961 +0.69232433 +0.69231901 +0.69231365 +0.69230825 +0.69230280 +0.69229732 +0.69229179 +0.69228622 +0.69228060 +0.69227494 +0.69226923 +0.69226348 +0.69225768 +0.69225183 +0.69224594 +0.69224000 +0.69223401 +0.69222797 +0.69222188 +0.69221574 +0.69220954 +0.69220330 +0.69219700 +0.69219065 +0.69218425 +0.69217779 +0.69217127 +0.69216470 +0.69215807 +0.69215139 +0.69214464 +0.69213784 +0.69213098 +0.69212405 +0.69211707 +0.69211002 +0.69210291 +0.69209573 +0.69208849 +0.69208118 +0.69207381 +0.69206637 +0.69205886 +0.69205128 +0.69204363 +0.69203591 +0.69202812 +0.69202025 +0.69201231 +0.69200429 +0.69199620 +0.69198803 +0.69197979 +0.69197146 +0.69196305 +0.69195456 +0.69194599 +0.69193734 +0.69192860 +0.69191977 +0.69191086 +0.69190186 +0.69189277 +0.69188358 +0.69187431 +0.69186494 +0.69185548 +0.69184592 +0.69183626 +0.69182651 +0.69181665 +0.69180669 +0.69179663 +0.69178647 +0.69177620 +0.69176582 +0.69175533 +0.69174474 +0.69173403 +0.69172320 +0.69171226 +0.69170121 +0.69169004 +0.69167874 +0.69166732 +0.69165578 +0.69164412 +0.69163232 +0.69162040 +0.69160835 +0.69159616 +0.69158384 +0.69157138 +0.69155878 +0.69154604 +0.69153316 +0.69152013 +0.69150696 +0.69149364 +0.69148016 +0.69146653 +0.69145274 +0.69143880 +0.69142469 +0.69141043 +0.69139599 +0.69138139 +0.69136661 +0.69135167 +0.69133654 +0.69132124 +0.69130576 +0.69129009 +0.69127424 +0.69125819 +0.69124196 +0.69122552 +0.69120889 +0.69119206 +0.69117502 +0.69115778 +0.69114032 +0.69112265 +0.69110476 +0.69108665 +0.69106831 +0.69104975 +0.69103096 +0.69101193 +0.69099266 +0.69097315 +0.69095339 +0.69093338 +0.69091312 +0.69089260 +0.69087181 +0.69085076 +0.69082944 +0.69080784 +0.69078596 +0.69076380 +0.69074135 +0.69071861 +0.69069557 +0.69067223 +0.69064857 +0.69062461 +0.69060033 +0.69057572 +0.69055079 +0.69052552 +0.69049992 +0.69047397 +0.69044767 +0.69042101 +0.69039399 +0.69036660 +0.69033884 +0.69031070 +0.69028218 +0.69025326 +0.69022394 +0.69019422 +0.69016408 +0.69013353 +0.69010254 +0.69007113 +0.69003927 +0.69000697 +0.68997421 +0.68994099 +0.68990729 +0.68987312 +0.68983846 +0.68980331 +0.68976765 +0.68973148 +0.68969479 +0.68965757 +0.68961981 +0.68958150 +0.68954264 +0.68950321 +0.68946320 +0.68942261 +0.68938142 +0.68933963 +0.68929722 +0.68925418 +0.68921050 +0.68916618 +0.68912120 +0.68907554 +0.68902920 +0.68898217 +0.68893443 +0.68888598 +0.68883679 +0.68878686 +0.68873617 +0.68868472 +0.68863248 +0.68857944 +0.68852560 +0.68847093 +0.68841542 +0.68835907 +0.68830184 +0.68824373 +0.68818473 +0.68812481 +0.68806396 +0.68800217 +0.68793942 +0.68787569 +0.68781097 +0.68774524 +0.68767847 +0.68761066 +0.68754179 +0.68747183 +0.68740078 +0.68732860 +0.68725528 +0.68718080 +0.68710515 +0.68702829 +0.68695022 +0.68687090 +0.68679032 +0.68670846 +0.68662530 +0.68654081 +0.68645497 +0.68636775 +0.68627914 +0.68618912 +0.68609765 +0.68600471 +0.68591028 +0.68581433 +0.68571684 +0.68561779 +0.68551714 +0.68541487 +0.68531095 +0.68520536 +0.68509807 +0.68498905 +0.68487827 +0.68476571 +0.68465133 +0.68453511 +0.68441701 +0.68429701 +0.68417507 +0.68405117 +0.68392528 +0.68379735 +0.68366737 +0.68353529 +0.68340109 +0.68326473 +0.68312619 +0.68298542 +0.68284239 +0.68269707 +0.68254942 +0.68239941 +0.68224701 +0.68209217 +0.68193486 +0.68177506 +0.68161271 +0.68144779 +0.68128025 +0.68111007 +0.68093719 +0.68076159 +0.68058323 +0.68040207 +0.68021808 +0.68003120 +0.67984141 +0.67964867 +0.67945293 +0.67925417 +0.67905233 +0.67884739 +0.67863930 +0.67842803 +0.67821352 +0.67799576 +0.67777469 +0.67755028 +0.67732249 +0.67709127 +0.67685660 +0.67661844 +0.67637674 +0.67613146 +0.67588257 +0.67563004 +0.67537381 +0.67511387 +0.67485016 +0.67458265 +0.67431131 +0.67403610 +0.67375699 +0.67347393 +0.67318690 +0.67289586 +0.67260078 +0.67230162 +0.67199836 +0.67169095 +0.67137938 +0.67106360 +0.67074359 +0.67041932 +0.67009077 +0.66975789 +0.66942067 +0.66907909 +0.66873310 +0.66838270 +0.66802786 +0.66766855 +0.66730476 +0.66693645 +0.66656362 +0.66618625 +0.66580431 +0.66541780 +0.66502668 +0.66463096 +0.66423062 +0.66382564 +0.66341602 +0.66300174 +0.66258279 +0.66215918 +0.66173088 +0.66129790 +0.66086023 +0.66041788 +0.65997083 +0.65951908 +0.65906265 +0.65860152 +0.65813571 +0.65766522 +0.65719005 +0.65671021 +0.65622570 +0.65573655 +0.65524275 +0.65474432 +0.65424127 +0.65373361 +0.65322136 +0.65270454 +0.65218316 +0.65165724 +0.65112680 +0.65059186 +0.65005244 +0.64950855 +0.64896023 +0.64840750 +0.64785038 +0.64728890 +0.64672308 +0.64615295 +0.64557853 +0.64499986 +0.64441696 +0.64382986 +0.64323860 +0.64264320 +0.64204369 +0.64144010 +0.64083247 +0.64022083 +0.63960520 +0.63898562 +0.63836213 +0.63773475 +0.63710351 +0.63646846 +0.63582961 +0.63518702 +0.63454069 +0.63389067 +0.63323699 +0.63257969 +0.63191878 +0.63125430 +0.63058628 +0.62991475 +0.62923974 +0.62856127 +0.62787938 +0.62719408 +0.62650541 +0.62581339 +0.62511805 +0.62441939 +0.62371746 +0.62301226 +0.62230381 +0.62159214 +0.62087727 +0.62015919 +0.61943794 +0.61871352 +0.61798593 +0.61725521 +0.61652134 +0.61578433 +0.61504419 +0.61430093 +0.61355453 +0.61280500 +0.61205234 +0.61129654 +0.61053758 +0.60977547 +0.60901019 +0.60824173 +0.60747006 +0.60669517 +0.60591703 +0.60513563 +0.60435094 +0.60356292 +0.60277154 +0.60197678 +0.60117859 +0.60037693 +0.59957176 +0.59876303 +0.59795070 +0.59713472 +0.59631502 +0.59549156 +0.59466426 +0.59383308 +0.59299793 +0.59215876 +0.59131548 +0.59046803 +0.58961632 +0.58876026 +0.58789978 +0.58703479 +0.58616519 +0.58529088 +0.58441177 +0.58352776 +0.58263874 +0.58174460 +0.58084523 +0.57994052 +0.57903034 +0.57811459 +0.57719313 +0.57626583 +0.57533257 +0.57439322 +0.57344764 +0.57249569 +0.57153723 +0.57057213 +0.56960023 +0.56862139 +0.56763546 +0.56664229 +0.56564173 +0.56463363 +0.56361783 +0.56259418 +0.56156251 +0.56052268 +0.55947452 +0.55841789 +0.55735261 +0.55627854 +0.55519552 +0.55410339 +0.55300200 +0.55189120 +0.55077084 +0.54964078 +0.54850085 +0.54735093 +0.54619088 +0.54502056 +0.54383983 +0.54264857 +0.54144667 +0.54023399 +0.53901043 +0.53777587 +0.53653023 +0.53527340 +0.53400529 +0.53272582 +0.53143492 +0.53013251 +0.52881853 +0.52749293 +0.52615567 +0.52480670 +0.52344598 +0.52207351 +0.52068925 +0.51929321 +0.51788538 +0.51646577 +0.51503440 +0.51359128 +0.51213646 +0.51066997 +0.50919184 +0.50770215 +0.50620095 +0.50468830 +0.50316428 +0.50162898 +0.50008247 +0.49852485 +0.49695622 +0.49537669 +0.49378636 +0.49218535 +0.49057378 +0.48895177 +0.48731946 +0.48567697 +0.48402444 +0.48236202 +0.48068984 +0.47900807 +0.47731683 +0.47561630 +0.47390663 +0.47218797 +0.47046049 +0.46872436 +0.46697973 +0.46522679 +0.46346569 +0.46169662 +0.45991974 +0.45813525 +0.45634331 +0.45454410 +0.45273782 diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index b5bb821..486418c 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -18,18 +18,13 @@ namespace CharacterRecognition { return timer; } - // Initlialiations + //=====Initlialiations======= //layers double *dev_iLayer; double *dev_hLayer; double *dev_oLayer; - double *dev_b1; - double *dev_b2; - double *dev_db1; - double *dev_db2; - double *dev_losses; double *dev_LossAvg; @@ -86,7 +81,7 @@ namespace CharacterRecognition { } printf("%3d ", a[i]); } - printf("]\n\n"); + printf("]\n"); } void printFloatArray(int n, double *a, bool abridged = false) { printf(" [ "); @@ -95,13 +90,12 @@ namespace CharacterRecognition { i = n - 2; printf("... "); } - printf("%3f ", a[i]); + printf("%0.2f ", a[i]); } - printf("]\n\n"); + printf("]\n"); } - // Kernel for Gradient update on Weights __global__ void kernUpdateWeights(int N, double *dev_dw, double *dev_w, double LR) { @@ -167,16 +161,8 @@ namespace CharacterRecognition { // compute loss per example __global__ void kernLossPerN(int N, int C, double* dev_oLayer, int* dev_gtruth, double* dev_losses) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - - if (tid < N) { - //printf("tid = %d \n", tid); - //printf("tid*C = %d \n", tid*C); - //printf("tid*C + gtruth = %d \n", tid*C + dev_gtruth[tid]); - //printf("dev_oLayer[tid*C + dev_gtruth[tid]] = %0.3f \n", dev_oLayer[ tid*C + dev_gtruth[tid] ]); - //printf("dev_oLayer[tid*C + dev_gtruth[tid]] = %0.3f \n", log(dev_oLayer[tid*C + dev_gtruth[tid]])); dev_losses[tid] = -log(dev_oLayer[tid*C + dev_gtruth[tid]]); - } } @@ -258,12 +244,14 @@ namespace CharacterRecognition { } - void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *lossAvgPerEpoch, const double LR, unsigned long seed) { + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, + double *lossAvgPerEpoch, const double LR, double *w1, double *w2, unsigned long seed) { timer().startGpuTimer(); // N = number of examples // D = dim of each example + // H = Hidden state nodes // C = number of classes // NETWORK DEFITION_____________ @@ -348,23 +336,8 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_w_ji_T, C*H * sizeof(double)); checkCUDAErrorFn("cudaMalloc dev_w_ji_T failed!"); - /* - //Allocate biases - cudaMalloc((void**)&dev_b1, N*H * sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - - cudaMalloc((void**)&dev_b2, N*C * sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - - cudaMalloc((void**)&dev_db1, N*H * sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - - cudaMalloc((void**)&dev_db2, N*C * sizeof(double)); - checkCUDAErrorFn("cudaMalloc dev_hLayer_T failed!"); - */ - //============================== - // Initialise Weights and Biases + // Initialise Weights //============================== cudaMalloc((void**)&devState, H*D * sizeof(curandState)); @@ -378,29 +351,6 @@ namespace CharacterRecognition { KernGenRand << <((H*C + blockSize - 1) / blockSize), blockSize >> > (devState, H*C, dev_w_ji);//w2 checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - /* - kernInitCurand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, 9); //9 - checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand << <((N*C + blockSize - 1) / blockSize), blockSize >> > (devState, N*C, dev_b2); - checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - - kernInitCurand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, 9999); //9999 - checkCUDAErrorFn("KernInitCurand failed!"); - KernGenRand << <((N*H + blockSize - 1) / blockSize), blockSize >> > (devState, N*H, dev_b1); - checkCUDAErrorFn("KernGenRand dev_w_kj failed!"); - */ - - /*double *rand = new double[D*C]; - cudaMemcpy(rand, dev_w_kj, D*C* sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w_kj to rand failed!"); - printf("Post random inits dev_w_kj - \n"); - printFloatArray(D*C, rand, true);*/ - - /*double *rand2 = new double[C*C]; - cudaMemcpy(rand2, dev_w_ji, C*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w_kj to rand failed!"); - printf("Post random inits dev_w_ji - \n"); - printFloatArray(C*C, rand2, true);*/ //================================================================ //======================TRAINING LOOP============================= @@ -408,10 +358,20 @@ namespace CharacterRecognition { double *tmp = new double[N*D]; double *tmp2 = new double[N*D]; double *lossesN = new double[N]; - int *tmpint = new int[N]; - - printf("Input DATA\n"); - printFloatArray(N*D, idata, true); + printf("--------------------------------------------\n"); + printf("One Hidden Layer MLP | Configuration \n"); + printf("--------------------------------------------\n"); + printf("Number of Examples | N = %d \n",N); + printf("Dimensionality of each Example| D = %d \n",D); + printf("Number of Hidden Layer Nodes | H = %d \n",H); + printf("Total Number of Classes | C = %d \n",C); + printf("Activation = Sigmoid \n"); + printf("Loss Function = Cross Entropy \n"); + printf("--------------------------------------------\n"); + + //printf("\nInput DATA "); + //printf("\nInput DATA "); + //printFloatArray(N*D, idata, true); dim3 dimBlock(blockWidth, blockWidth); dim3 dimGrid; @@ -425,170 +385,102 @@ namespace CharacterRecognition { //================================= // dev_hLayer = dev_iLayer*dev_w_kj // NxH = NxD DxH - - dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_iLayer, dev_w_kj, dev_hLayer, N, D, H); - //kernAddition << < ((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer, dev_b1); - - // Copy back to cpu - //double *tmp = new double[N*H]; - //cudaMemcpy(tmp, dev_hLayer, N*H* sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post matmul [f1 = dev_iLayer*dev_w_kj]\n"); - //printFloatArray(N*H, tmp, true); - // STEP 2 // X2 = Sigmoid(f1) - //================================ // dev_hLayer = sigmoid(dev_hLayer) // NxH = NxH kernSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_hLayer); - // Copy back to cpu - //cudaMemcpy(tmp, dev_hLayer, N*H*sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post sigmoid [X2 = Sigmoid(f1) ]\n"); - //printFloatArray(N*H, tmp, true); - // STEP 3 // Scores S = W2*X2 (Matrix Mul) - //================================ // dev_oLayer = dev_hLayer*dev_w_ji // NxC = NxH HxC dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_hLayer, dev_w_ji, dev_oLayer, N, H, C); - //kernAddition << < ((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_oLayer, dev_b2); checkCUDAErrorFn("kernMatrixMultiply failed!"); - // Copy back to cpu - //cudaMemcpy(tmp, dev_oLayer, N*C*sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_arrayA to odata failed!"); - //printf("Post S=W2*x2\n"); - //printFloatArray(N*C, tmp, true); // STEP 4 // P = Softmax(S) - //=============== // dev_smaxDen = Sum_Over_classses(dev_olayer) // dev_olayer = dev_olayer/Sum_Over_classses // NxC = NxC 1 kernSoftmax << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer); checkCUDAErrorFn("kernSoftmax failed!"); - // Copy back to cpu - cudaMemcpy(tmp, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to tmp failed!"); - printf("Post Softmax [dev_olayer = exp(dev_olayer)/Sum_Over_classses]\n"); - printFloatArray(N*C, tmp, true); - - // print check - //cudaMemcpy(tmpint, dev_gtruth, N * sizeof(int), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_gtruth to tmpint failed!"); - //printf("Print gtruth \n"); - //printArray(N, tmpint, true); - // STEP 5 // Compute Losses | Cross Entropy Loss - //================================== - // Compute Loss L = CEntropy(P) kernLossPerN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_losses); checkCUDAErrorFn("kernLossPerN failed!"); - // Copy back to cpu - cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); - printf("Post dev_losses [Loss = CEntropy(P)]\n"); - printFloatArray(N, lossesN, true); + // Cpoy loss to CPU + //cudaMemcpy(lossesN, dev_losses, N * sizeof(double), cudaMemcpyDeviceToHost); + //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_losses to lossesN failed!"); + //printf("Post dev_losses [Loss = CEntropy(P)]\n"); + //printFloatArray(N, lossesN, true); - // Predictions + // Compute Predictions kernPredsN << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_oLayer, dev_gtruth, dev_preds, dev_preds_probab); cudaMemcpy(preds, dev_preds, N * sizeof(int), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds to preds failed!"); cudaMemcpy(tmp2, dev_preds_probab, N * sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyDeviceToHost from dev_preds_probab to tmp failed!"); - printf("Predictions\n"); - printArray(N, preds, true); - printFloatArray(N, tmp2, true); - // STEP 5.2 // Compute Avg of Losses - //================================== - // Dumb Reduction - kernReduction << <((N + blockSize - 1) / blockSize), blockSize >> > (N, dev_losses, dev_LossAvg); // Copy back to cpu cudaMemcpy(lossAvgPerEpoch + i, dev_LossAvg, sizeof(double), cudaMemcpyDeviceToHost); checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_LossAvg to tmp failed!"); - - printf("Epoch: %d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); - + if (i % 1000 == 0) { + printf("Epoch : %3d | LossAvg %3f \n", i, lossAvgPerEpoch[i]); + printf("GroundTruth :"); + printArray(N, gtruth, true); + printf("Predictions :"); + printArray(N, preds, true); + printf("Confidence :"); + printFloatArray(N, tmp2, true); + printf("\n"); + } + //================================================================= //========================= BACKPROP ============================== - // STEP 1 : Gradient wrt w_ji - // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; + //=============================== + // STEP 1 : Gradient wrt w_kj W2 + //=============================== + // dW_ji = Probs_k - [1](gth == k) dev_dL_dscores; + cudaMemcpy(dev_dL_dscores, dev_oLayer, N*C * sizeof(double), cudaMemcpyDeviceToDevice); checkCUDAErrorFn("cudaMemcpyFromSymbol from probabs to dev_dL_dscores failed!"); - kernSetdscores << <((N + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores, dev_gtruth); checkCUDAErrorFn("kernSetdscores failed!"); - //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to dev_dL_dscores failed!"); - //printf("dev_dL_dscores \n"); - //printFloatArray(N*C, tmp, true); - - // Copy back to cpu - //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol [kernSetdscores] from dev_dL_dscores to tmp failed!"); - //printf("Post setting loss at positions dev_dL_dscores \n"); - //printFloatArray(N*C, tmp, true); - kernDivNdscores << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N, C, dev_dL_dscores); checkCUDAErrorFn("kernDivNdscores failed!"); - - //cudaMemcpy(tmp, dev_dL_dscores, N*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_oLayer to dev_dL_dscores failed!"); - //printf("dev_dL_dscores /N\n"); - //printFloatArray(N*C, tmp, true); - - dimGrid.x = (H + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (N + dimBlock.y - 1) / dimBlock.y; kernMatrixTranspose << > > (N, H, dev_hLayer, dev_hLayer_T); - /* - cudaMemcpy(tmp, dev_hLayer, N*H * sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer to tmp failed!"); - printf("dev_hLayer \n"); - printFloatArray(N*H, tmp, true); - - cudaMemcpy(tmp, dev_hLayer_T, N*H* sizeof(double), cudaMemcpyDeviceToHost); - checkCUDAErrorFn("cudaMemcpyFromSymbol dev_hLayer_T to tmp failed!"); - printf("dev_hLayer_T \n"); - printFloatArray(N*H, tmp, true);*/ - - dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; dimGrid.y = (H + dimBlock.y - 1) / dimBlock.y; kernMatrixMultiply << > > (dev_hLayer_T, dev_dL_dscores, dev_dL_dw_ji, H, N, C); checkCUDAErrorFn("kernMatrixMultiply for dev_dL_dw_ji failed!"); - - //=========================== - // STEP 2 : Gradient wrt w_kj - //=========================== + //=============================== + // STEP 2 : Gradient wrt w_kj W1 + //=============================== // Transpose Wji (W2) dimGrid.x = (C + dimBlock.x - 1) / dimBlock.x; @@ -611,7 +503,6 @@ namespace CharacterRecognition { kernGradSigmoid << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N, H, dev_hLayer); checkCUDAErrorFn("kernGradSigmoid failed!"); - //Element wise mul dev_dL_dscores_2 [INPLACE] = dev_dL_dscores_2 . dev_hlayer[sig gradient] kernElementProduct << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_dL_dscores_2, dev_hLayer); checkCUDAErrorFn("kernElementProduct failed!"); @@ -624,7 +515,8 @@ namespace CharacterRecognition { //================================================================= - //========================= Update Weights========================= + // STEP 3 : Update Weights ======================================== + //================================================================= // Update weights kj W1 kernUpdateWeights << <((D*H + blockSize - 1) / blockSize), blockSize >> > (D*H, dev_dL_dw_kj, dev_w_kj, LR); @@ -634,43 +526,29 @@ namespace CharacterRecognition { kernUpdateWeights << <((H*C + blockSize - 1) / blockSize), blockSize >> > (H*C, dev_dL_dw_ji, dev_w_ji, LR); checkCUDAErrorFn("kernUpdateWeights dev_w_ji failed!"); - /* - // Update biases1 - kernUpdateWeights << <((N*H + blockSize - 1) / blockSize), blockSize >> > (N*H, dev_db1, dev_dL_dscores_2, LR); - checkCUDAErrorFn("kernUpdateWeights dev_w_kj failed!"); - - // InitUpdate biases2 - kernUpdateWeights << <((N*C + blockSize - 1) / blockSize), blockSize >> > (N*C, dev_db2, dev_dL_dscores, LR); - checkCUDAErrorFn("kernUpdateWeights dev_w_ji failed!"); - */ - - // COntinue to next epoch - //cudaMemcpy(tmp2, dev_w_kj, D*H * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("dev_w_kj memcopy failed!"); - //printf("w_kj \n"); - //printFloatArray(D*H, tmp2, true); - //cudaMemcpy(tmp2, dev_dL_dw_kj, D*H * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("dev_dL_dw_kj memcopy failed!"); - //printf("Dw_kj \n"); - //printFloatArray(D*H, tmp2, true); - - //cudaMemcpy(tmp2, dev_w_ji, H*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("dev_w_ji memcopy failed!"); - //printf("w_ji \n"); - //printFloatArray(H*C, tmp2, true); - //cudaMemcpy(tmp2, dev_dL_dw_ji, H*C * sizeof(double), cudaMemcpyDeviceToHost); - //checkCUDAErrorFn("dev_dL_dw_ji memcopy failed!"); - //printf("Dw_ji \n"); - //printFloatArray(H*C, tmp2, true); - - - printf("\n-----------------------------------------------------\n\n"); + //printf("\n-----------------------------------------------------\n\n"); } printf("Finished training.\n"); - printf("losses:\n"); - printFloatArray(epochs, lossAvgPerEpoch, true); + float count = 0.0; + for (int n = 0; n < N; n++) { + if (preds[n] == gtruth[n]) { + count += 1; + } + } + float acc = count / N; + printf("Accuracy: %0.2f Percent \n", acc*100.0); + + // SAVE WEIGHTS + cudaMemcpy(w1, dev_w_kj, H*D*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w_kj to w1 failed!"); + + cudaMemcpy(w2, dev_w_ji, H*C*sizeof(double), cudaMemcpyDeviceToHost); + checkCUDAErrorFn("cudaMemcpyFromSymbol from dev_w_ji to w2 failed!"); + + //printf("losses:\n"); + //printFloatArray(epochs, lossAvgPerEpoch, true); //==================== // CleanUp @@ -685,15 +563,9 @@ namespace CharacterRecognition { cudaFree(dev_preds); cudaFree(dev_preds_probab); - cudaFree(dev_w_kj); cudaFree(dev_w_ji); - cudaFree(dev_b1); - cudaFree(dev_b2); - cudaFree(dev_db1); - cudaFree(dev_db2); - cudaFree(dev_dL_dw_ji); cudaFree(dev_dL_dw_kj); @@ -705,6 +577,7 @@ namespace CharacterRecognition { delete(tmp); delete(tmp2); + delete(lossesN); timer().endGpuTimer(); } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 94a2925..1facd5e 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -11,6 +11,7 @@ namespace CharacterRecognition { //void readData(int N, int P, int iDim, int hDim, int oDim); //void testMLP(int N, int P, int iDim, int hDim, int oDim); - void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, const double LR, unsigned long seed ); + void trainMLP(int N, int D, int H, int C, double *idata, int *preds, int *gtruth, int epochs, double *losses, + const double LR, double *w1, double *w2, unsigned long seed ); } diff --git a/Project2-Character-Recognition/img/CharRecStats.PNG b/Project2-Character-Recognition/img/CharRecStats.PNG new file mode 100644 index 0000000000000000000000000000000000000000..31f16913a7ed3c06b724cbe5b45a667c27a2600f GIT binary patch literal 47006 zcmbTd2Ut_f);Eqv!HQUpphksb!HTFfkrHF09~4xiMny`1fb^ay91ki|6a)b!DosR) zNCJcw6alG`Pyz`Mr3x`13F*n+{~gbJ?!E7O-}`;v^M9W3BzxF1vu4ej->mhUHTTZ{ zZl=6y>na5W1!W8KGj<9JOHt6@h80Rs$q)2{eCTUQu$|c{g|hB#Gtk2_FN6(3L7_Zl zwe*@I^t|$>xnr<`!rCVJ-;&mVS2q+Cs&p*QATCC@v1QJ9J=>>);U?ORl9BHvk;_*E zKH8~baSGb zwp(4q!r0FrO?4}%fB*hj==)p^vGreGsEbfu+-W}kueI>E-F`EdB#}7|^VkI%Sl_J_(Gc7C3wGYyi+KQiaAGCauJulCQu$OjNQ zu_=y#1Uo=L28yvDI1K(!m#h|GIat~BJlLlWhR>51$+BPpoP!691fUQOU`N4$c`088 z%ACm_vhHyZEWm@G+n1xrQDCz=d=8FTa45oM;+fJ(WHl&USS>&THj*ZpniNFx#Qn&a zzUd20q;N92Y%5uu2%jUbbLeyNovUbzeG+CpF~jncjg5EaXOdw-1jO z1WXZCvhzV%N`Fo1{Tbm3ex?V)ei!G=+(^xO3H@oidFV;+oyvqhgh>)p1` zrTxQPX!c}iGUlbVWY2g4_!TFtng^d@7i>qt9tJoCBmj5dQQ&FpA`TZvAV-3wX1Dmx zWFGhqG5{fu90NLlP2}^N@?~S%roRv)9DJ5OLvHhKdabB-ucrC^LleOR+j2Yn8CPrA zYuqxAS+|(&q=CA0mIJ$>J~_iVie&qlu1F=~2Lx^ZFcH=evP6Te0#Ma!!D-+J8$b%Z zFd_g6r~zIAxKLa6^@m0F%FoihDC{gK^_oY5nQ(hZ0!~0S*ltrZbVM?aY1$g#mPyW- z(x=UB+qifK$<8s|?wIT4M(0hv4Qpr%H|4J;OFI&AG~6#|VL6z%a9q~wf2_&6Tp4i{72uK=DI;Ja_#GY#;-wVq43Hw# z2JcC*Y+Gqc(_R#ZwjHE1g}#?vQ~5%;h;_@;-3Pwp5$v)E^dGMUs;sA;MsRloJSZ|w zM_P2|lJ_y|#&DV1-1(UplMM3gzA~$U7lqp*jU6%#&Cj#0B1yO|kU~2QJi9to=fP-m zaLI^l)6lZS5+3@RWnzUl-qr*RhepYo4?ZCE`DRgSq-G;AFJ~Oo&u`!A#R0og8npfi zB(c-91LhKK85cVNEC;{ABSAcX2Z{vnMQt!;4*W*urwnPZ!(U?4_$mOvtni7~C0XRg zXMg;&-`|I5yXmD(X^`6|M&a4pZgbM(^Zk^bgTpB+Pi~;0Lias(U#bAM-G{rqN{qDe z=pRWG8V)C5+k*4BkI;LOfsRvq=-#iVV^UP~(|DnmV$({=5&L<_x_1JHePEzUZOc#y zyXN0SH3QVZkFxO;&~^^wVJFqepaEtQdlL)*W^7^kGCuQH1U&%c%itngU``eQ&dm=j zM`7~(G0(L8UVB89VJ1WjB5} zyiSZB3iA6c>)7{$2P#5SHSw1?fq3DGVDK^e^2U5Ej=kHEq$3?(AC^>c!}-Tw7!4VQ zxEpV1F5zMq00&?e90zP=EOHP8QxKM#0bSThfFe}~CM9r*7buj$g(^Th1OjZb$Y35j zEdcl(}UHxgrdug3V7kD@1j6+)?X7Jt~VYb5RR9Oo}v2@4rntQf5t z6Yd1-c`0ta!W&Hbs0=JO%OkExVV>Hch*#;GP&%K!0g)a!r6-X*JRH9jtty~ zODW0=6>n&{frXWd|Jdp9^V0v?l^<8V~MojdI(u1WKg0%cno(ReXZ;$jWeeAp#u|FEi~}?Co)8s&<-v` z)kT<0TS@P~YA*8r=f83#XH#4L*(8M8OY8rC?d@=&QC+w+9P(Wh8aJ+t|QghXTwy0j)SBP9Ugaqazoipx3@!8q{F3$MwHY3Eyb6 zkB(E!)*TUvW5QdF6mP%#o{N_8fk^O?$8w~ABH+%ROqKgmPsg2?q2_VOlIIgLt|xYG zQ+_kzD}~WIJ3L_OA8aD;{Xy{wa~+seWdDRAb3fs*I_L%_SllYh5dB&twV?evyt~Xc zGCFv&Fzzi~YA*v>G^e{N40fcKs*hOyWg}-@{eZAfCWdpiX`HrJ@i_ zahs>CjI?%A9EJKgX>|#aJ#?l+89(wCA9ti2bvaI#R9=}ARK6vA$=w|hZOYZ!yBbVe zWi`xWudd`fOyFw4n++@1NKF{WMr1;1?C0_Lb6b`TNzX(sP)Pit4-+}7sCGQT3`rsw z3)ArtGa1qKe)#J8B?=0+yee3_?zvdTEyC}t9-oGN5w&pso&quexf3HHs3x^c1 zcW87eA=1t|jt$>DwCYeWyW*_Axm6$BlvcQxzE|x=KVd$XMGVnWhh+cacu^5GtB(8R z_sA;en_bPt8n@h%^64FGs6m1NYB1t9g}K~}W3Qf5Hi_1Xuaj#+nR*OiGXvBgjML>- z`1SsmSoUkE1RK0DnQ*Vn#uokBiq;XV#{)G>8!&+)=Qv&UA41N3oYN+{SE32RsY@1} zsf0Rq%X)Yey;sJ_uRZy3`snhF&iW!lGmiEM*7Q@LNZa(DQ*8EB_d{MH*Qy^2 zNtyky7E7FKNkQ$;3F_ zJvWy^CAo@-t+}IO+V+*7p{E~^zYKl-b49F+)0IDO{u1?sII4(AHZsSHTmX@41xQWw1WKt?`hl?G(PhD*qyxDigUEAM-8yDOldN{Ceyt`K2vp zbhNX`X-0cZh6pu1w~~4zwW1UItZXvvT>*x)=#r{>a8j;QNlc2^A25h;3zVI43 z1H_mXH&*b)H;n_e&3fg^GfprxuYkoMmaPv~c>B9E5)1{U!a8glYZ5QX8c)>dP_*Fk z(!SBuf^qpn3i|_NlU!teQD1@Yh6}NGIjpk1=JM{3D55BxidTBnfhWJHZW)v}+^7=) zXmnvLHz}elzsQkEf%0W0Ue{g5N?ztIf6VQ*`U?$z9QY5mumZ7F{^FWHG~_0%pmgdd z!YM5M?Ps62ZvQp?jVJw5`En!_i@1PJ^$2`)^n*oy+V3Z3@~bSMNxHjW@&DG$f0iP* z1wv{Wi|cA5AFNw*?=ngQ2|hjfXwr`Sdbv$tOk2L483^qV zZttm}LAdfIYfwYWuknKtHw^qnSRV{ycti#Ss0$brykxr8m|usW>ny)hrk$}QVPyv9 zKF~J8jPhfvBhBJ;oy{&JRK%Mwf3H%JHJnokGW8ef|5U=aX|utY5z`Nd+LiN!EhWm- zN)2XnWV<$Rze1&t*J4%@fBN{I`*8py_UTLxfQ()o@OW4<}D1S+II7p z25R&Q1)MkCe-n4+&C-2U3%PzP%zff?tJ-oos_*j&`;f&=g&_|wM}vlX2>LrraC9}_ zF{5aE2a&Aef5T|N{}bdqM}l=EPA8>`q5^v*;#4W2oMgV=S5hr;H@7q-zmVOKIPl4} zdriy_CNV=r-9bQ?sD^58f4`@-Zs3C?lGfD^eF%^dVNdFa& zzLTbET1~1?HpcuiX)rn`ei2Ww{&Az%o(&^nUxTL^Wp6stX%_SdF7EFO4|+==E56D$t9v1k7M2n$`hIy$ZMhOOoo(adSAUOYaL-ayAV$B!c4?9( zzKJ@-hgtT$Kv3_~8cmPGlE5w+8&jJFrp%_2;H5Hx#*05mDRvs`D?&RIQSDbAv+Wt6 zp1YemnI)Z!)SC=oBTIqWctRRoju92Jwp6>ELsb@uOAqB8Td~iNGePRl`FKfYIWr^1 zmA4&wh95vYB@ZrU2YWcZ0H&T^RHwZS#g*A4-5ZvPg4IG9j%vZ`MG(%6Ov?KCWc}{z zB+^uhgd>d|NJq{xslsDn%TarY^1aS;j+tMo@2I)&730R^QKEK_ewA6gZgeglxxfBA zsH2}n;>H3&@o%2Z8}t;;B?cd)BYmr!Di6043r!nreCBK`nQja9v{ZLT8|?>VIzZ#x zD-gez?_(RL`_geNX6bkb_@<}butWy+c0}^kpgbtv(;%r=^R;K12gTNJgOz2Dab#JI z#RL5pmHwW2uzvVs|K!P8Y>TE&IA+@!A#R-OYGe|Zjt|v1?*k5NHtbfra)_=QiDmH_ z9eoAAntS_6&WyK#p{)@ye{hQ=#cZ%1k0&0Bfp7O;m%?{kgGJ_2{Emzn^3yF1k(x#UFBfR@82TMvH_`s&n1UZHN#8I0` zkpK~W2JP^^3n-eMWLke=PqD89BKr{I$t<48bSpd@9jDtgn(zwOIsHvo)w%(ZO*c3< z49{9@7V3FC!JQQq03*5$(nx!Z9uJ=PQx_G8tu009Fmlc=1pSqZLZvjFvfPA8!b4`) zpc6;fdkwB7n!=gZ-AMI>Pu~BpZM0E9exH9GVJ}=@trgRsG@De3Q7e z6z9QxZ8q7eH|kBgWg=cA7Q0NB4+h})+qj1X>?Sg6J8~XijVdDp-gTbqB-_t%`6$dr zulNTuPqlI`AQMu_8y-MJgiAw*xF*^)gIJAyc$wJ%2%k=I`9Zu+B3L8K7+>PPPthow zDovpTT(hc|T^WwT$(3_sE%kd#r{eOQcmk?-PG3A#Bic%OFNm!6ojUo^ZEswHaC+TS z(J}Nhmi|L}VfU252N)tV=&(=j7+RmHA1oW6JRAB)7S$?{n~b4dSN>!LmS?fBy< za#+$tw`+2&RVQtOLBnj>;@C6xDz zg2FeC_4!##J>w^U*fhqPVnkyW!m0Vs^Q#a0px$R*E(~)k%@&J-$B=u{(VLRDqnRJ; zy?Jj?*tSfE8swsvSYQr?_Q})O7))mpZhu5(x<*OHc5aW5=pNp59O9rOJWDjXfS6h> zeblkHH>W8%Z%q(qw!QiYZzX(rneiqO?%9#3|FF2R@LVw_sCUMI*kED@#|<^Oz0NJ* zr)-~5A-Ibty;BRjz+7oW#&}1U`Y({r`y=*Eye`>dL(a=}PaIQ%yC|^uyUn%Vdz8o_ z-968*^8B&CJ|7Bx88-M~z;+7!V$k6qPg?VG%pLHW?wM1cCgQW*g1ghp3iq=USkE@( z-vQ<}{)!s68a>TPt9cf@37XX{($8zhB84_(YkpZUVXu_?-A8sn0ArS_lp^=6zUZyX zH!+OUjrh4_@wflo+j3h*o?8Kly7hHD;YZQC~l?2 z^cm0D4s^M835yYO=i0wC8_7j#WQz^>ZxR`NEoZ%}2zn}Q1I&N(WtfCJ%b zl(Qo0TwPCjjv073gm8}ld&($c2#u)$>VPZSBX5Gc4%QgC@i0>v=?2rk{97yuMd_zp zL6S$)r`iovdA5ENb~)c&WU44?NN(j!U-IO{9Q`)l$4|Qz4D6<+ThsA6mMP<-wM`-HG)G2ocJwlsiS{7rV^s;8qwk;#H zt)EgQFvf6bneC9@S{=6||14~~E#r*1i?}j-_#D*_BW>xMM5QFMCMXSuNP$TmZD_YW zL`-}AAa^W$VD)6j^Unjwp?#UNSyl#-KCtgMWC!U@`$otzaGkL^FQ?xH+*(h}jnAAr7AW(_#vRsT5uI%k2{v3k+z*h0-erTn{4K3v!ae zee4f)^v>qk*MP&3di;q^RY?A5_@ZrRl6uuq!5F{dCxW1c+X;g&;U7T>;F^pr04#I1 z(1nB4hAFY#h`U($gr!aCuNG@D2I!yRx#*7S!ws=$6PwXOEoyM1(%YBcwP?PR5M;Z& zh)q@1H#=IjaM7TAr!BT~ijB8fcA`Lh$NgnzHm5)OE=QNZDR7B0J)dPTTY1=WXn%o3 zr8Pb9d|n^9Ex7OmD)gaGtJ&|x$Ti61jDSku<>7{HxXkUKldNhPYLi?uzP}u_V1+wk zBmI3VwTK#8HONvMWd`O#t>543xddu<*&94;wt3y~n=;Ke=gPw(V_Kojv~SMbqNc&< zANQXZoo}d9(;89N`60NsuG#x1fFsBid_ z(bo#5VBAG|vWH9A+aO+}DYEDI;7%ral3yXn-4{&}-hC`S>)&n9vymGOQCvhnc!}a7 zv78d;A)Kno_c`5~62y*_R^QWVY`J*StpXACae@3;95i6*P5ujq5ZPcuK`F1V0cq_^_F7BLHQeWSv;n!_ zyktAM^p5DEaH(P9{IO_iIfTXMegD;)@u+ahYu3!vU>j?-7$E2*zvyg^X&06j{8`D{ zK04v*@1kP?P_XB4R1uYu)Z?BvVGt3NR;cJ0l;#71e8c+H{@psXY%m2cm`|smOyRo( zzmu3T*`PSM((l}K_)InPt~KvWgk`=Kf=Bg0My;~az@Zx4U(L^@0tLRcKJCpgv9;B>F2 zh2MiIn})&N%?LVSCIogVcqAri__^44tm{er@5{6oDA$7LI)>j`a3?#uO1c8tt^B2# z89B}AS~)Jx85{DO1+e93oqH!K!4eA3qJP((!`hylwDG2ldC7r(;GWnAo?yy}^q&FS z7oKfZpPH^Vb*3{1>7&tELAhGyX=9-Xqp4YmH*>k+6f|jW_f}9{8n5d|LJrpqDD%AN zAJflL6X6a1SZFyqsv9tLX2Aaxk?ZrxW}I4sp?kWPCi*c=X2#qsWz#8>+f1;IOpjO} z@z@){p|y%SpBPe#fzg>#KbHFAo;WgGORKq`=r8pdmK~%8Y}U?2)2g*&oD-t5qe@#L zU!d`z<1$oOlZSyrP+B%~q5~8lJ{4+|UjJ9O=)V2{k-#D()xH=>h^qHvzFrKGJQh>b zJNEw-7S0A_U4BJ2oIVdG^6gv0uf=>~ofiMe7tjWyf1r%!3qGcoYnzwA$fZMJ{qz$) zwrFbzHtQapP4!&+1lb=1Vo>yts`?@hGL9=ke># zu1mGhCLoK!3DSNN*8`d0-Y)WV$qKd}QJW5%1N()C7nwc$6cOKN{i|lQb6yC@aY-@? zUu9aLJl;_;u345?iqyrh%fc92lQmaN!W2;cBE1!g0G z&#Fr*HZuzr0tqrbv$?&k69?z>S~3@~mn6JXpR_{o0v-f@xT>mC2M&ju|b zg~pTr1fa&mFSPAYpk*RmDVmP!Nj5;l0* zlBe&Rh{IwU1g~L`1e;z%^!KaI`4GzcTZ}Jl1PelzK_q1+>PY$C6)WDiwQBXbLiACp z2VCz-Lq!JwvUP@;P^9Inj?`vct+#L)MlQx)#cVvl@agb8K?A-%eTJZC*nSC;rLmZy zvOHj4K8(aKB>_f1#;vWa48<$IZ>j8K2zUdMWstpTHfz{0ejVzHB0#wWyiQTjMsI`R zb~V^^+^uxO<41FB)VBGpr54(}UcIa{l&g14;m6yW zD8gxV@ySl!!aJtiq%Lr6sW%!oChxzUaLwQv%wRB3W^FP|KA02tY5xx(3&f*=XNxygCp%b6bgU^C> zyI?9%u=`CSyDwt8c+BQejV^N@*4GJad*|Q|AA}D;8ar17 zElgwluAgp03lt93t&pE^+@e9pbpP^GK41UFXV)mAzS^RZi_<;$!{CsKJ=li7^qC4Z zdh6jG4K3i2SEOB1U-+BIQUj4gCfIq3ad-a3ea54oaoe|D%TROLO|i!MAGnz(11_w3 zQiu7UtQpA&Q$x9{Egi;k~tHtH8MDgNp9;+&S7|&;~0Vd zO)e5x0Viov7@;AjY8umm5ZTK2hg+^le*K{6Eo#i4N;_0Llnikh@rUum>3b)h;5^n{ zGnlxr-T5(m`8ux5S7fZC{$=79yNvm2T)QIp1YiY*RU$F0qL|eD$;b*XWMSB|gVB=_gAl=7?M1Zoh^7 zkIKt2rcd6bn;z@q+ta;{=ALmDjUPL5kVuAmew)No_WYDtNAd#w&ZE(OqwW@nd9Uip zHErwFXY-$|IQgI=PS@kK_Pk>)=&<&eQ=f~5k*`5(hGhJhLIpfCj&MpMasQK!rAPdv z<{LLkUMQWsp0*>P#y)u&>QBX{!TKyvl*aV7z&05tasmEsqQ+ZHrO{G;-cZoct!e}jH!qW&~? z_$AUcCvKhk?gG=9A?F6wW%TFn?Y@f|rFSkm&$7M5yG-<vuTSBHIz{MhQp zA&fWfBnswLi{RtZjq}S^PFv^9^e^m-4XuAiS_=zct^XKn9QWy4-L6TzY0`#B2@9!b z9=wE9E%+$YrYiSf76wyi3N*SNU+@Rdj{f-_9;q@#SDTeH$}#Y-(bQWeW45mB@^8Ji zP>BL>1@-1#NxIG|YI>ub@;kXbPIp@+5NampKNPNN|1?X5&Hq9X%4AFGI^`kyLUU!cT!~C#*dwY>W!Q!@P^jJ1k zGsd>YGd%UOH`z`mEx+vNsE2TREn6*^PDwgpDlNaBTFC|aIpZ2yYsDvc^5lY+koLW( zN6hpL{Vr$yF*E(I#2F?(jW84Ch&jXnY)zp`BM5pfaH(uzGUuJ;_!_7V9!(=#=)l@t z)}=D~wiYa2nTc`6?H@tknilu_QPDt8p`j#nPZE;Kqn+L zc1HOHby2cxf`ZZx&u+$Mm)E2eo6gGag@pf3l9jEYJ{Xp%F|dCu{D#N=;Nj8COC}#> zxfsyW?Q5O2A=I{ThhM7Fb7d{_zP?;&Mctx7WSZ_pSj>u@XXGWTs8h)P z3Fb>k#f|^-*z%vA8`Pk}k^hA$mzV$Bsi!M=y7!ru{o0jnx3JbA&|DyK& zQa$71ad7P6#Z{_qd(0)(_xKWV9hra1*X1fQXj|be+hlD#B9h{mmxVe(Q3Xwf>%N92M_UmX*E@fLjsRnC2 zCu0GkQSy#!+eRv@?x{NK2P4LvHM-|B9M;@+KMzwn#zC9I-a$|`yj6V;%mdEPef=bZX7vl{;&N0_ z?>7AhDM{CTdJh;pI()J>KH~-egWp>v3K?yziOw+?^6#xT`E;Z+mZr4Y?yz~u49+e( zlj`w&J*}WHMq<&MP7cGg?ff>h`QslNv%QOXtg7LhWq^aa2XjhXs%tv7D0(W)jupra zZ8}YA0G%$(8gvEOAAoPaYmgpTBQg|N+{p$vqR*UmZJJ^X3&goT6Z#TE694s#It}ay z&VRkQ?uHh3?To!_3>Xk2ryNcv<4fIJm%F|*&M_OqOyUoV-OFO9NV|Hi#+x$Ls}c@@ ze~nJ$Ck$qCE&O)>hbLMZzS)qhu^u7WcDw0=?cIFwH% z){Q+MPEu0~@kZn+#}pdJVu-@@Oa#&u^1Q_&%d0Mx;cm@q|D*!VYt z8OwE^z0H~<=$o0hUU(j;j@7JS2WSyUk0!;jZ`_Z$9RF(0TQ#m@f-~iOGyx z=IXHMeENsw?)2jqTT6sPQ;x9bqQHIhfDYkJj{?T7P%P9z#P&99+LQtBTJQeeJmsmn zCAs@@K2<+B4nj=MWpw)Bt;{F-^zHfgDv4)|Cd6iJrt*yDm%FU*_=l~*vb-d6J^IO3 zBa#L>G$m1zLO*;{dWru>ouOE>J?sjm*3G3iIPa=6sR<{d2xXcfOEVy4US-FhBvd>?l%DRe*Mb!h3+XL!>Ne($m=3D9+iy8v;CUN~>aqO=O&y=adyIoC% zrv7Nvah-fq3ty5#K8K&EhHx_CFVW+LM_dVcVU;&5n9UH9e)xG^ozwDLcqI7qqirM6|jJG$fQ9(N;x}^7S(@dR0{M9)l??Pk0 zpV~xKPfrN(SpqTX7vYgz9|-2%v=042VBJdet6|y-KckQ}4~*4*J;bP(j;HeyVU)RR zp@62xYWVMjz|oFPDo#c~=kU5-FJT_q-@a-?xV3y6h&LwV#^lwL)aC z@%zr(squD6w$RaSK1IYOC_XO)ip5YaQ{A+f68VvbU(dNP=esO=Zf4Cw{CnQWyZUIu z@!vwzD^ct=B9^ItrIYq?IH2u;tVCb+TAC-bIcyw5$vNpoy1xA;s|tx95zhCk)1Lac^p+jJmzfat=im1ijA#~oG_l4H?Y0Wb>q8y-7>xRZ z)E}JpO7Wb7q7}X3<#NVoZygt5Pa&FPqOMIdTg6!+O6b@b67YDS%1(TtKx*a)I;F9! zD1g9@#n>VX;AomJoh=46(e|l@iWkPjcI)h|9XPgV)t`dg;mFD_juL6`S{GR5gn@Y? z|5A&eikERde=91U8XUt=K0TL2sqqG)D?!+sJM`*V zsP~poRv@keo6p1R4XrhiFs*n^TV%GWN`CHOO&()KS6YC5s=ng>ps^w8N6l`)*mYiQ zJI)REQ6wEXRo=<8(o!f;*yRAnCU>3Q&wZoOHC@50akE4Z+W8yvLRwDys5s1RhNU0* zU3=^5=&6CMvT86wS~T!7qikRAkM}QJX!z`ea~<#OBLz;NrC^|D#9mZ8-Z}t>C)@Gh z*%(zwMIvcOX@J)vx!4G?^}KeBZT2%^*m}-X+ZFnA%Id-0QBM*YTc>?7x!ntirw)H| zbO}f{n{o45vTp!xPrjLqtotkD0y1r8gsZlL5~{dz-owP;L8XdsfL>KiraDp9^jKxc zB$#z~{T%jELbvZ9NcG9BwBDdteR0FJ@+Y7&z2WDAvz)AQ1A|03BCuWggXK(g{~K*B z3As?JBVkU2PI8w<$P==HAkPuP++ZkCq4GEL)0^ao{T8|h^i~1C0jr-uk()p>Ma?@o0%5K55N7< zX&I`3y!A|KSZ6o1`fNUh z<$P5|ns-bDu-Y}clGV71mAEOgEMC`0y>MY`TyruS@w2Y2h?b5eI{0AI)FiskbGQDL z)Ye%j9NDe|3(+Z`z}Gcz?D~D8(?~0E@!Qj8JavyInemE3lW^2%mOF8Fz_qU{X_VWI z!=`fD0LM5-#th}^o!Vi^NNTLS*19}?n%>MyfGv%MQlSlc=PzK*xOscpi(=#-NUi0frt%Nc9_V)~m93=S zLNCI%Z9umK!$L&e?T@lL0sE?bD>xhBbiWcr`i9*oDBa4^^+uK+BKzwv#Fzx5`VaNsB8swjp2?Dl4=PrRaSy%1TscBe`1x00AbmM;gV_T}D{R097YB0HO0-EjGnS1ZxP zWlR20LYyD6gxPdVuQKYz&si>#$|m8#seH?yn8Q77wBqBrpXo)$d!adk#pz!B88St$ zhsOepxhV19p-NN6DU zdwvKF63`Hh7Sk4z_a~pJ^GIx}A4e$P4&|jL&YxiPgp*imD3$0I{|@mM?y1W8=&*)h z+1UKix|i5sw$3S8ZY?%;MAK*>tC9yJW1XjwobNn$Sx1K}**tpCo?JroQ+J_9kSA%dy)q_u35YP@nJn$)o9d8;r~ zHVo595%HebN2|V{cY~+RNqS3nbr4@qc*`u=!J#rsTFNjP@B*)D$ftpx1nMgb9-3n?qFVEBNPBQOd^*3`|m?U+c|3>Yh3#6S(H6OoJ zEY+F-k0_tebIrd*qpa;SOo@12z=2zNk1Ivut_7q{wb0(xR~dWrUC16Uyj|fmy#H|R z(Mt4s=^QU@nodCFF0zP?wwF-Ywxo=Qevbc^nbL_`MnE0$h<=ewLn^p`e z>`#_9pEmwI6P`KiMJbQhG5HoO!tLqf_Qw&t$7&Zm#d&ihm8G=Imf$yDvaw*_^W<+a zlF!8*jN?x5;=Y^_%>yW#lt$tR?Zng_(I z7`(L4LNeudu|;Io%=~Voem^5AIF?*s)-_7oIumGC;mVX9-N9S*!rRla{97O?Zh} zfhSYL%7Rj-j2!bj8jB)R3tjed!)N>huN;dncY$lU9+1Ie&NQ1|Ub$|b0K*|%}+{G6zP zhBj(3CXXKgCBc%isk~Bd=Cn_x3q@R2IUU7FeoSGO@ceK+G$y2h_4&^z5Me0kHtRzH zcq1Rg0~7eFAl3MjS^54~Czjt*UGZ_U_O@PNs)(Svn>%&VlaA+L_!*at~j0Se)#iP0H3JAj#-9!^;ZhLGBlkl zsOLQvSkL1DOHBwB#dopK9%-;${sW-?-!kz3M!2chs(>F$f(i&`RL&?vO z)$MvuyQ6HBAZzp*GOeDFvj|3tPMD=S{tL6Fi~k6)TT(CrN#`8|MD z**Q6n&5(Ic=X`2fj-_wAi?^LLI}Zu`;{-&>4?e|wvR#@1W4}GBJi|KKs(6ZPxKPOb zNl!gg2_4-0pqk|p>$*KNG#nRqM&N7EKR4JDf_-zYM5DA1%Ubj}c_GI?xmoa>cs9nm z1Achd2($g zDWNcn%DUB4a?Y_E%Bt81o1yMKT3|!WCT}>t_(gxXr`{Si!IRkv!jFuomq6@NMW}@U zXG|*~0SA&Y*ZpBFQ2*t)_Un|Dhpb^kHRmF0HO$P@OAYBj$vZ`7<|)9*F>!Ifx3dcl zh0Q4=Jj_pWmyayP72-#$#WvL&{%D?`sI_%mv>A6qIIUQ_c3F6F7di&c0VwA9emR## zzT#i^lgm5NSAHTCZaguUIi`nV6Bx!kJY%eh*d3t);IYFId)h_7O+J^tEEE2GMc zKP}3K)RG~m8tu2}#~w2wy??e3)+uMR4#w-&TBwEwPbK|$t$bO4Jyf58Y8pXgYcn>o z(^=$8BL;;zl}Qk-<+$vysd@oOSRfTB`_e)4)XVKmEM1j4uAtXo+KxSHYUKg{zGWBG z(dGnYYLHa(ywZj0mQf9MRxz%|X2~Y^4bZ>vcQx4eVmhFUM5{LceUYec6_mWn+5Rtg zi6UH^f1;eiXO;hPWr&jwoi@OGnC;;rgXl!8n!Y(>slu(mX5_{uhmQ(SOr(m<9Qd@+ zLJ7s1f6nQajc=d}Vz)34m@`B&f3Hum4p&sT@tUBCkJd+~J1yG3g}E!DpT2G?Y8pU- zU$X)9&Hi(HNeyA&W}RY>Sg9KBrSRWZ4#>7~2t2buzufyKwEO#w1t zjhMB(Oq^VEgtCKWM^Six#2su;x}%bQ&e8pQ%y(TdhTOrg^1V=WbX_LCfBF?wOpTHq ze9VlJm5{&-17uZ8NFqS{G1e2zi8c(LQ^3!eb-kS4TOND-z&zoPJMNZg8;6S8l>dCT z*z+2uM9aH5vEeYHQ4Vg41Afybt{YbOkePt`twN+JyJ`^J8i|fmYE!WF`{Wc9EzR0Qr?Q%tc|0SSDF@b zCw9Kj%06a`b6Z8dan&8Kw&b?*=Z_Rl6t)vrq@|$8v+aj2#Z8*fu-&`Lut}*YLC9T$ z!M0v^)z_5GXRt>)wx|^65o~;I%Vd(u_tzUVr!ZC!H%KgBk)ta)TnQ6Ik1jjWv59rd znJkN9w_x$W=c$+y!{-_OfwF_80R@YBK0g`C->MoGzkzff`@v7n4a(ZWEO}t?3uhZQ zc<}g>ghEcEQ$EBt%q$ve%de6uCI;`V>5$CC47O$p9Z~K z3FHC%wYu;tpyl{$UUf+u_?m|=fVjXs(3e)FBZd=GetB*Ww@6fPAt^^)IipIWB$^wp zIGFE?cy3qt&`IGP55MM}T_nv7{2qiJe1z`!aWjZrOpH`1JIvPKzh~}RRF|gXJ6OT* z?!V<)?dmMeg`L$pQgh%6HFuNAAU}8NnM!t4*;!8SY9r8efReA{4e?hzJfYz+HRZsh zoJGxcK}PY(nNG~^Z`d>gi0!bI8K83FbhYkopk>BAxnA=K>r;^`7eqUKKBa(DRu2qK z(%1?C6{BJ`llM_eSZ~ky2WooZ} zAb)x$iu_HuNZ06|o>#r^0mzvn#myw>l7Qb1%PcQNlqs&1#sJnBiSGnU$0jK`oRVV5w80iu6zQ0`6){$uMY=|ay%JqQ>aUl+g04u! ze45L0HhXXrEIAS4wo5ABN2|`wU{&-8loOk!0iXGQEw}?@o;VY_oW+~>UU%7&S@JjO zJ_Ksw58~7tD^?=5rUrY+9jsi~e{ooHKjId29sU*?avz(OP=LdBrN1L`hXenIhcRFbUo9ZQGU+ok3(A zXFvrBRz#Wzf=Y?l02L8XIz&Z;00993X^D!84nk0x(xTEt2nd7{T2ut2Mrt4dqO=Gh zAcO?c-z(_c?{=-!p$qS+FKM+w5!a9KZ8Cu5BhNf*!?NIF|yU`0Ka_ z{;71KDpXkTbVwRsUjsW^D_g6P=g-p^dRp;9JWI$YWO#KJN%v9{dKN-Et%>g?=pP;L z3a1b?!uV#5EJ#9&Xp&V}n#{^_6#<-OElS~CFBPt|2NOpAbIWo?oZW-y?_gLXy!v+J z@y5yrXKOJS@2-KMxIG*xs~HWQb@XoROp=|Pc#cr!C~)o6)Po#~Mp0*q1q}Oc_}X`4 zo~(n-hSF`RLeM~f200W$v*u1s821c_cGF>!;DiT=lA56)gs9+$GJ9hZTl)weF6(vRY$q?> z>64xpPub+JEIUmVb&1Cav;6YL&27s|^JC~}YhmI)S$0hUrS4nPADOvmS6j7RM1^TU zE_qZwnQA_vQo8)blfJ45?XjCx)zF>yq9=Wr1{K$%)q8ZYcgogDD|Knm4dG*xb1t(3 zQ}u~@h;<>*%VPs)uv^sd90MwfI48(H*KRX6+Yd!mxsikhv@{xLhCmX)(3Kz2OzRn0K`-8Q}tou${oF0CuqUAtbuVG-( zNiS#`*H*IBx5rXF-#Jffajm$E#f=DWTX}sg2{8+2}i+1D=fEdFV!1n|)B3O0#y@)!ihTmQGY?sj6N=P$4Vlnh%y@kU-EwdI^{# zyU&XZdnPo=kN;&v5oUHr9XW-u(6F zg>z2|3Y$>QHpB};BS5k zN?Ef&czKkz|467VA*Y^MH_5wuXYf~Eu^djYO>`lhiX~wzIpA^PUrrQ2DdF%`v-rtY zU-#bv(SN!80w5|QQg-E3HGvgaMOb4s1Fe%WBg!P)LVg-%m6O<-CAMwpZI$z(gt$FP*)&{>QE->6T3xkL;!nP)!2o*`f{20<6Z}60V>TwUVM@AnijTx$2U~^T8v6>H&^pHe#4Z? zd%^|j$QoE#XeKh9FdgU+(^erQ3*zOmKu&R5XxNrk#b~)jbcZ&LD;4Mp!Y_aFaUyh3 zd~S2LMxTSCklx`BY4yyih7LA`Z0O5N#7$H4Xg<08JeqMX-v$G*2^|w^`HeLS@>mNp z*#*YqhkB8VL8g8ycdvcW7~C%J#lcY`!^&N2Onp}#m+qX(Tp;a|z-)Q3RTSyI)iyH< zH_83Y(Dd{eD)B>u)SO`iBST6uf+i-agSIay?>$De6iyk-+|6Jy-$tl}tTd8aa2UFL zAOq<@cB5|#x8?=gBeTq`&!D+#U5%cE_2ZGc_JRQxhgEOVHuFe}>_g!@B_;Wv5bRJR ze0SXG;J8`h>9z7bnec`7;X>8mMMA!OVqa(>bJKi&L>B^C+5uhLgs!R3v?2)nA z>p#~|%+V|bQwMhuTAvcZA)h50$yS`B##fZ3@yOA-U67VL(8lPkB&hJ&r$!gscZOyM z)v1oKhV~a2N9~cKO*AM5lkVTy!9^e-)Y_f}xe@}}*A>3z8zdVLd%I8rgN*zZs0(jIV|W;dYKU{DN+pKXK@A2v_o_6tX)BTuL0K=Ez(7m-g-AXYJ_lPDrDJr7L zSsUJ?X+7yBb%tTdeEkE3OBC8?KP4OlsmgRc<<` zsr9&p)zz70DJ1VN{CqZ6rqm|4%_VR|PHj)E`ZUcI=H6%-)2pFCpCz*UF0D0a=Ikb} znz<^_!J#Rw`Dj{d2O(Vz>Y>+`c=&~z;Di_W97Jk+2#V=ZO|b5RLj2O9nltzh?H3Bi zUvlV8bA<{ZF`53GJ1H0+5GLixW}6SC#3BWrfBas`6Vr1Y9r#g2j|_?HW<*J zL>j?gCr#42tXAg5i-Ml9%%lj*RF?p+6~i(3v>d^KJLN7ISb18B89%j{=hc$I%0oaX zgoNOe#&jB-b=p_Dpfb{=kQ+@#mI^R)ldP`xghqC&&%{;19*R;=?N0D(2xZal9F?Xy zPm1ms7;=okoS|cm!Be04735FsO*Nvxc!DeHm+dBc7%-#pey*n1CJHcCyH`XrHfq20 z^W!syDkf3QL6TFBwFj@z<2Ih#&+?bczcvV#)_Rhbce%xs6c}zfza{1&1EWj_q=gmo zsJX44$QWjMz~F}%Kv26HgnMdYjlnxP$rzbDSN_?v-JL)-lw@A6v-J`$8XE232sJ$f z{MiBSwuJHWad%!1j@YXc7t>38p(X<77CuJ8`LuU%6PgLdNa6Js&c%4{jC)A?L$>Bg zfz?S3xD!;FRKEYTonQXPiZ2H6vJo#Oy(rU760eV4yK46HW%SNlxL5AYd@0Q}f~p^% zP3=bQM^A9c+_4#Gn#k_RCX1RA7b$KugF>=-lT!J6RW8-u`<@NQ$EyyE6>vqavQ23L zTI9@pv2HYQSW!JAu#3 z_HX0uW$s|eWE^{UPZVsm!Q#UM;OFTMe0n_im2KiY^|C)TE&r7A{y){TfLHlX^akng zzqh!2*{m+zIW#mhgMmS3%F(*}w`eWpgFmwR72CXoC?OyJL1p8{t-yu=GK)Vg2z@s& znBwxkL+^rnNc2BM95Z5YsJ6u~Ba1X5MX>#0&`T*lPNe6hJpE>IFZJWWRwFXwsKK++ z^|!Q3ZZ3>fnaz>}%9(8+!4QT>xwj ziT$;KgBX;};=8bs#{{0@e(-DNZVC8Bf6i7a1Fe&b9FglJ$sI3$m`a>|(~wGN-A*v= zxqV1}?TIi$e?e=w@q&s@r*W5{XfyT}&QgT!qCLHIs`8)=HDdGpv8Ak|WxE%uj^ug} z=@g3BELd$C81uyN#Z=TeXybyF11L!@OE(6hJ>B|?b#R@#&DoaO6#c1j3+k^*OzNVk z)qACqld z+Z7;DxEJ|X2NEY5Pz4%O_$T~mM~9rHH|K8(79@h{e99#}Jbq@pMXDmv8>y2r)tznM zMtJoQ(IoN1zI$H+eoP7VhxwC zuF4F>M(}D6yo4tyYMmRF{&et5F~&;psh|M)&Lu&^P!yQhd(r4^sT@ zD=DTZnmMQJcJR06C9T+fRPvE^fb8Y8lbk9;JLe7GG#OR5aGgqWI!<112PMu?_0xaL zyz}vb@l~2m`Gl0;e+h{tA5DFO#IwpN(6aMpJf5P7Tu#>PCp>d5;5;Q`8;X+s#8A^? z=}&0!5vV!)kdJ=Z-W4Y|aKHG;gO+IqsqvFXt96dG=yN?di&2(Tta~IboZ3Bb1kN?j=U{ zG1QOa$hM!t!785qgqutGwQuh(Fjx5UJ1FOh(~j}5RRc_)X7NhnW)#alR_hXbB_B2T zaGAJiP?)!INfEY;`}g7N^nE52rV`hIqb|DQGdlyiB%vTzP_U?SwRVJ%w;E)S?fFjj z+s$`u%H+(%J_-o>dp2wvCfm2lXqf*L)I?o?s(_WAGQ04~sZ{4Seh4ojp(?Uba*m}P zrUivtcozgtx57nN&knHFdf)uXX2F1x#Uj@4z3X z7A7HH3AX-1K8r70-xv6K5!(x=FrSvkWrTeWW@`MFAJce}dmS z4y+uKNWq8Bkd1JcjJC!^obIXt{*0D%Y4Ir2D80(~Yc{mSr%H%uL)~W0c;Q)DjZ6)g z5dSqavfT^0Zv!#i39e zv&*!Iy35XuDMmrVdv(5qZS7yKHIPVPo@k>fl?wTM@b3-9Bpk2dja5UOu? zwbe#38U*ROH{1Q{4}I+QAvjmfQuCm=gH(@oFI)p0MJMSF%7Pbw1vrcB-~Y)1#1KWs zC>geFB>IMaBVS%;-T1Ry{Z}#*;G5&2@b*)u78)OT#U{QKnHt$oaNXP}UtWSVf$Q}H zPmm7f!O~^wQo73fTMtxU^6K}?DSftZ)VjS!Wjt{U@1oNbMR?IHhMHM&Fp&;|Hp2gf| zx$f;x6ZSv9h=*s&-0%{pMCAW1V|bYY0+^1W9*Cicyi-+XC+~+ob}lhZoH~))WqXg- zxyC<_cEUpPLBUUnIJe`{Wi>g$w216<&xic^x9tQpOKV~+ncM0AjiCQCHX$-2XDgaX zK7Yr`Qp|o_Fp43oE**&E7!PDrJquBRcpa}MpI8}2-h+OH_Uk5${hbn7w8b*HyRVF# zAGtiwNgsK=dS}!u$|yFwCu22>bFG8F@F$N@`KLUh0VE5`*C#7cvlS6PS&qId;oLV% z6$DofX&f@F(YeRuRNcAs1c2;iCzqcnKnmqkA}z=5jFO3N^njXp@~fyr}?BD)m| z1~hecUrcq@mSMMIwVZrKvy0}~<7!S8*`Feg;tOE=rK1yFbLOT?g$dKxfU2Ciphh$n zg_O^lL}`OMF}IV+NpcX=a;ua@_!{>+JVbUdh5V%+~c0x3i8z3x<0aO?i-ab+@m-7T4YL!N&W>(xUUDJ6I zwI9p_ALofGM~18H)j!t-r{mJ(k{7FPg+NDT5GW&#^I~m&yMS;#V)(gEg-1q-Fs9hF z(a;uUV@;hgbuid2mml#L`u$froCMHa;@6y{+8Tt6OGdqH=ROXla-T`bi0@{NPZv9i z)B~%Nqd(CdY@6J zIeR9`$s}8wvAog*-Tsi1Edwb(+xmUPQj$cJ>=-bd8>}8XdMRAMmOB4Fqiftng>qNP za3kR7iXJmD(jFuYZ~Y3*>tEC6-%;YO)oHYQ%Dk%5px^&8m@B5gHg$RRZ<6M@>pH!@ zXc{~oz9x6$zEs$Rn_hoHWU896@$VQuQ@YO%s+rRS5v-b?Ap+c z-a{h_3t%GWJ@rJ`r?3VR@cp{0NJ{A@cl>4D4Tr0FP5d74I&p%B+AON!9!+6l$8XqM zH-4Koy=)Rg{yTq;zQl}i7x=DtY5Qf!qhXNnjNzkDHDUL1C$1E$$YHxBo?f{$v#v9K z_4J%DUQy>Ip>5&g@cznkRGbX?kT_|c&&!Mi(;nk`{}Ne;Y23lj0zmcjN&|7D;|pOT zgO-I9OE9F{9Qh|2uK6Af|BHpUus>({;|bJ(9pnczozHV=X<5qeJ8mfwz8s5lQZbf% zq`f~5)&|RsQZ3nKq9r5g^cM3a+7!bq3Ao; z3*i2RwNp>1?j9=6n|Iyn?M)tpJfO>-Hd$>v#w%T9m!#iy4wg&J!*aUt_h z+gN$y1UDYxjs1)0Jn?}zmMbja`Nn{p$1y&Xh-%J3Q@?ftfJmXvJM)(b-`KHT+z~!# z1;}ftu4AXZZLny8Nv>cBP6s2TIv-eu!jD`f!I@P(>7j?BrqExQs{%kU%hxH~y@ncfLsBKVVM05I3vX(G9S zqJC+EZ!Jo9_YZDYip`^9La^%-+~|s(Bll~Y-70H$iGr7>-&h=#xi&J5Ka);a#&=P?%rJvyo&Pz4=$m>bHN80exI&O<`%!_UBq$m z{xOJAc5k7!5aWxSpP{k=KPwzWakzJwe8;}Wgv*nh2Lk9m@K|itP&@*-u!r&Vn8fRY zupdlgd#Mi<+tl}3q3*0A?9)KiVisqKngm4zXLwMH?$%fK%ShMg41Hw$X01uKd@iP8 zT-OPNFCQf`bVCGu;=b^_URG3-uy8hwYm#%cIuoc@R<09};QFZovOg+hi~QZ|MKKE2C_Jim!Ek%jlLRKgxTZWgINiu} zO*qK>UdOT;H}}rxA*at1Y*VSd+(!FyDuC(G6nxRbo9+zcBWvd~bS?smnz|~8FZQFK zn&DLCgr9H_m31ox;*gqZ1ht7G1%^@p1a;D>Jry8fjxaADwq{@*a+%-df};!Db)T+9 zoNY20$Z2_P?ERS`f=V=zD^CZfU0%7vSKoQZ@HLKqE|M)D2fOlt1_Hq^Xg7cCw0!Av z(vi|0&S*%PK##qB_zTPjJtp(!?e&-~Sj?w}3OOj!;a0l1lKvX1YUX9QgH zz7$Gcq1^IQ<1wK}v|4W>`9}xy-P-`a^G7TkG&8vfrSRGSXs})^4;=EI@qJ zS95ldxeqYBm!TC0Pxms&z(4f27OX|6M&g>ejUlDf_bh*DqogZN9|-#yadc6<9B-xpl90w1K1qn*6o$LS9jtxbopND2>3ysqu&NH*5kR{AY1Olt>rov6ns?!8Y z%(n-dfiMHU?@R8Fx281p-p@&vZ2pFX<-$&R7pe7_pGrE(@g!A0KWb#(?VP29R^xqq zNHj*O_TPC`T;SGCOw{c$ro>JVQZ1mw#6s2Ar7Y6CJE4OvTS8M|21g;N_R?b5Hg94vPmZ9vIiH;N3+>y{dEob2+Rft#8=132ho#i` zd64|_zNM~|>3F#moLF3_UDX0#+qO1a<+o%-t7FA3?@f< z`q)#uAC2tfS5LYv#Hu2^okTgr9~l+T3XCUd=oZd;Qiq^A{3aWCi7BY&zL&Wu(-yW| z4N_=pB6pR)DOXt7QH*q;*!X86HL+enQ^`x~hdjzx=+HR)kQCtJJ1$Sr(UeJm)UR4yW0E%MRL%o7d;K24;(penY8KIZG?9Su)a+x%S z@R_4S9O|W%49Re66RM#H0VD@E{Quy;9fqb$L|H-3$qrIwl&({Iu|%CrcMrjWp0MK} zRAB*lEW>6T&>0LL$_y=IXen!q-GxH&16vmA(`RV~c`ZwDOaGZ-_?fs9WLR|j+gIFy4|xKFoT7}_V~7pZXo#lGOxW0XAbYoye({)#gbej{oi zK^*9x=kt3U$7eCKZVg9%;Ay4Qm+JLs-EsBsf|-tM{`raUq>>0+vQUL(tn5?MZOhaoj8OkKUiiA%8s9TkM*_m)wiOaj8#sMA6Jx(pBBz~Kg*GhN%lCdf}O88ie zezhpnv8*59PxvG39ei;{vZ07ywYZGygI|0KM4M%6IQU@`CqAu`1Uug8u z{;|``sZ*dt41gC9PLAySVN;hNa#?%Sq|1s8T_?&4EQWO4ACh)8_k#CP=I!h7+Z?RN zDVAj7&_GS+g9~zRnUfKDI1RaTP07Ti4bz6McHZ7KCAV-t%X?L;%dFmX;y*;bRelPM zTCV+v_Dnc%%eBiE9zb*Bq2TZvlEi7?E(M+FuW8_vcj7cK$TBj7!-w4i+9!qREYhK0 zGdaZu9cS>uF0$LH${%-IGL+GgVe!JQ23n#{K3wD&dnIBU3b7Ys6GxCmBd~(980dZR z67iAKuc+4O^E0O>mkvzzUs3N_&A*`DrDA*b58mKtUgs&Agl?m1nrQVBrcG`DT5yAO zUR=Hee+mA6Z86`J89ndTKxiTKJMgRa3g781TekZi;mEk2uqxyN*d?roWuBBR=H+4(V7x_V+cZp#z3J0 zg`%rwA~9`xn)+tzbC(OV+SdIMy-okGsbC$g3!|dq9lk?!b<%h;625 zIR5}=^sVmhrY;eOPoB$w%aKQv^cNw+oCK_uHsQujkKND#xo$TuJT5|wHN0JYG9zCz z&@eC=RZDf{QA%HQZM@e)n)E;SC(Ze1QM84OD;>h@s94mJp&2bSBwvud*>X_zVafb ze`O6-kv*K0cs2fkj)q_j8+~`iqL%;BRx?Jm44?Zfi?z42;Y12r4es+(1&8XU;NjXt zi_i~3eX#4@v73}x$gN)ge9CM9T zK*n4|in{Sy!hP5+xCS~-?2@^W%*_Ee@T|FXI+V7aL^$!@7$E5>e2&&TX$ewL3wwGD z@Hz`}cBv9c7*4sM3ESXL*r~-XB-`FLOAeQ%MDq-MR-}yq=zygnYXV0IHYj)h+8+6{ zEt3`BO5318UL!1x{j)(XE~~2E(k3ixkXs%c!#U_B-+I(4_<~xeN(Am2H#I478odd8 z#?Oo5z+i?&Y%lu+w#b2l)a^b&nKx6x%W1Q>rN?96>-BxbV`n^%&P3*?hT62M>9Ol7 z`^p7%$?^o#fHw!adxxC|464{O2WP^0ynS^ka49m3PTD)9mygQw?Fkv^8AbJQNtWK> z6CI*b@i$ov*?9SfTuf%Y^vHmOf$Fv>I_-F-{O#X5OBK&5cbTXy>*v}?*!@XZ%K(1H zv%AUFt8J13Q{sPF*jsZYPPISUGNUef%gkT?4^e& zmtH)DpY84fA#wvpyo=Y_w+Cw6Ri~afxK$1dM`khhQTCP3PuH!E^m8J3C~Q`1IWsos zDNq@7pa+_d&BQ<)MEnCM!Ojv~JMk%EYLtuNOZGHqY1M#8K;SUt`1=$k7kGrB#)Y`g z7Zp}OFom4pLya0g;T7F{YYr-8h32k$YAZSonrg_|Ihq-#ryG2;S6$|-ZjV!!1SMw#@E{%tpm8d^1u3@+d zwz$SEnMLwzfmJu;gwDS#VrVeBmZ{!RG(%eCI{XYqZ6~5MGF|7&16*UTG>&LHzZF+7 zrmpXos{>Y%K<2jk0m45-f*JhMjCT)!$~7^MaQe^{noACc#&aqoI7J zP3on?a{1tIZ31y5^DIz}ETD4vKpQa7^2C4j^#G4p8I9Y>Unr3-!)Y>Av8UnWmDe6B zq-|%KsTq1ddtA6Mrcsb^%rJ{z_g1z5Bl{d5K3Wyr68VnHWyrmZHO^^kv@Vk}58K=A zXJphpwB>6N>$sXUlE37nKjBE$5EKz7YVMU zn{f@_94!BI&SteGv;U(@$lm%tTKIv*D$)lXVq}i>*KZ~4?hzO95gp|;PH0-EE63L` zH^gUs^F;M>Y(Rs-^m^#Vfj%ukk-^V~li#nqhYNIbOEdnu?snSub%OqdTGxU|TNuiU z)S)?i%>jRdmvQ1gjU*TX8jDB;zziY2rp>^vQuQa^8}6tl&JAwih=d`WpRKyuF5BqX z=Sm%JdpEQtQ(1aoOrUJ!bq^QPYOT5c^|4HqDuv)L_}iq#o0ztgU0lnucBmy{xb z9t`)Uwi%8vG;u~Z%Wv9gKTwKPf3Fnbm{l<;|9attm6)&G+aGTyJ7pD{ zvC0Vf>He-N>lm3Ga6rhD36`>USIlCssLE6xb1*0w*y!G5uyK6$GWvYX{gSlV*h6ol zi#&Cj9u4je*ep5g*fs9#W1#!{WQwEgs#Ae{rw?rLv@K^97ORHwMF)y(pL1xI-JHnZ~*B$Jrb+IsEgl?1Ew}hT0x78cK z87{JU@2=EisQw!|j*9WK*fCPx$c5%R_*nXR@uZ85z6%0p(hhtvYP!US{R|$BKO3dc zZQ4*oHY%d@telPbKq|Zqru{i+90(*+p1FxabCCx!mg42>R>SWOn(XWxQEV}rZ{#eg z3T)-44&I5*NF)G}A@9i?8CX2C*qygO7IcjV9L<;z|l7>4EU0PYx|Ob|aMe`g04KN;_hEda5GocOW7i4kf< zYjwYR5gyO_tfPQl1n>FVKq&^nZ3oLq?+3Vx9=Nt-sinn5GuG6xUwT~DA3jqSLv>+A z5n9|h24}LkBzHR~SwkEp@S`=3Ur=}8MVZ8 zx&tX}(_2XGE4vSOBDf_(d@G@e%(Mo~vr;smxRl!R&lb?8M4Kmsj!G07@UYS;f+hHs zt`a1fYcLu*nO8xW4zgcbFO_wB^KUIbGja@5!>Diw7K`uEO_+NZrt! z06{Ae2eZ%R`%i;?lUpnIm1Z4ONOJ&w@j1s79c)DDNRhGNZaU=7HI!;r0M7}qJxZe? z^>mt+i&a-1a;urL1`UAV%G86J%c%Z2bnXrtmXbt`A3{$-}}=+zdf>L~RdkfMMnOc-HW0G&?PQ!@6@d`fC?14kqWPQn-aPG+OAd) zyE2g5iRk3{)O0}}ctaUq*{x5VR)w*^n&y3d9R!Q%b@LS^A7Z3IZ0H>*2d-`}Xy6N~ z?)1w+>9!kt#NlOZoue`DOwgM<@_TiN?$7cjt|OK|a=qiS4Yl7X{=5}o;Psv{4Pv+} zV0mj02uiD}dbS!V{L!%NZ>VYcYedG3%+~U&b`Fr@)swXTuM(Oe>Vnv3bYu zAoD8*yqYS11H9XSn4n^|RIF@hObq|>p0*vDSmrekLe*+MFVV31%MJ)E0q}lJvX8Qj zu!9eT1itBNc?E;j=`8=08hq+z13E2Q2U^zdGokkYZ7B%j zCYpEFW&lwqZYk~ijqLhLIt*^}$05o1Ecz){l6oG5k+h=)nv9}zu`xF6JQ5HptkBMI z;{`jAXWa!j|FI(RWi;F#J~*3`rh>*x<)2A>0zTHCY%gLG{cy-+Em~twOc|PNGi<9o z2s!0TD}%r+b5Z^vQtAe}Jq|At)5gI+W#08MGZfu4@10h{{FZrqtZLN-b*g*`%XIC_ zWG-InNToNPPaLLiDyQsbOTh2SYkZ=ehgLS)W+x$0}yJ}DNA24@igL!j}k^STDeW;^r zj|4HU4UEeDGY`Z+0l8|rsrav}ED=-U!{4-Y+|+FbwzRm;Vk9%5$YN4C-<_3I z&0VB)8j^>eX4;6F4O(T?ctHsu0?fHMy5xtj5oL9rvL_X4qssRNGbT`(;5PDadG1H! zVQ}lX?)`re&&~X*P<5Ra(V0SFSw6AR3}+rOzd01BsL+(@eZ)te6or^F)Fk&ROf&02 z%?KTA!;56XdqHH^pgA)3@q4gkEfv@KJ*lctGeN{ckdTj-auMJI7F=x%?mV9wXFcixo!aMY+(ZG| zk#bt(L)$1G9ltKg1B%@216~K}b`{N~Ap@*Z-b0zDcXo}&{dmGsCS~~ghAoJ&LA*a8 z-YG7nr>j!8098CJ7kY9c>ghIb>hE)7N>%{m8dFeomc2uTCp%ihzjb`mDg;^37%ZL= z99*(LD2nY5vCToEF$u?0y3iD}Ko2YII0SLoA&zCG+X1$q4gQ1V8vljlJ{;oPWJM}^ zJ_$0xOv<8HBf{SEPsWsq66vL(S}_kaFp&nt>>N6yeJS2~Jx~rgXyLWdHC6_3Xv7mo z-?<;PlW09$+>NOl4?eVO1|)bn_{52vF0t!453^;vRqWH~gmaaNHuAWt+0k2)YVP_% z)8t%IEt~3BgNUe5%V3{JsgL^1P|MMXJiNh$1()A2St5VFS32 zG~16BUBXWr_GINJ(-{VCuIM3^_cE}2afm)c9s z>nsDKf!-iiVO53B3x`a(AH)g{VzUJNAabQ$$@1e4;L38#Mo#LiI`_a7{HS`HSm6M+ z;o~V^_%(B<&`i7&#+RSOk>0XNL~sZS9B zlobuXq3TD=u`-?Z85a3z+Uc~ ztJLv(S1DJau^n~b|D>yQ^Iu%0g4gG%7XRZa{U2B9|F}y3$5s0O3s>o^lbY=OCw+5r zyObH<$*2b67ga`h8m18jvj$?FY9n6@ad*NFW!#_X0?8++b}|!eHh} zWL-BeD!T4afF6pmTN9{tNE~cN;2N!aXNemy-^D{1r>aaYb#h>Y68-GzoK1wYf zAYK7%kEm6syWlrIu-rLqb7sBdGt4{or^nZG>eY8xC9{_~Wg64ohgn6T9d?h?!#Po& zJ0F@Wi(Y-XL#N$w_l9u1hvv2x)gpO!_l$PHnMDbRx($%ErH*HRE)8m2YELK(Y}7}^ z72km<*%*HQ!>ltA(eD2)UNA`3JN}nxT7SUDOikM*srA?#r2yr?gj^KpIe?MSEWafQ zMsyj-c~FNYj% z`yKK*?3w0^4h3U0U9h*Em-KD)q3<2hv2-&rIsM)dU5k?`yHhnrn4nsLB>J?!P)R#_cg8lglKDzm1nhz zK&PmAWl%8c`q)b%wkVv*hV_Pb)`>ko!}$IvwIH(mJQ74M)9m%^ z{g|P(qVzGISuU8YMIS||&D0N;c#6`QL*)bwe`MD*|KCWWNmE_RTEBC>ws-zI>TVPZ zyZ*eXAR%LwY4TEBw)=RZ-GYmS|a6XlFnND z*u;=N&#^^r!mdEEX@Ydi#Mj)O*H^!%$ZnRlFmP->7T6{@K=meI6bYs2y(#hX6k&wd zS-%mHmv=;gXQ`?3#KCo8XNxzAWPgC$8-#oq=br!1EITP;^{lSt{rLm${Wb`Ek>Ei* zMpkBZ=$VTnFaE(jOUH2rJ%|Qr$Jxe7mj6V{47XpaJf_FT4#_vL5~JOIWK=bTTMl7)+7Yg%V_M{x zy7nXuLpM)3b{1uZ!w_d$a3+}EF)Ltwcu-UD4Iu@7@SJDu3OkWm7_ za{Oid-Wt@<8Nu7L3(E{?QHnoya`k@6>|b?dvhdY~bK_&gg$+=lol6VlEUA~J#o_5H z4e0lk{dfRSIY!HFf*M1P-cJzTEFOzW$i?WRS^?B4r?5+F0t8`SDze%#$5|yluMB(R z8?{1-yT0f}L${X;>nDV^XXtFbM}&?r{uYph^&B#cAcC;tOqBs|L(}L^W`i%(L#A!- zUIp4g=3yovyGiI%upmEm6Mv)OYk#2K#}e3N_0E@co5dZP380WIr~96_HZ($8w>FVc zSn=vkz?0m=udh|>7pVQR&q?U`5@XPKCVl>!!QtH~9@t4v4&P~75;&}O z`MvDbSi@9r2)h8|aE(5v1Ip`H{Xazez-lo>2Rej4gp`}S0vx*h<#ZY{y&>lys%~>% zCGuk2W$LOR&UpH7AXA*ASDxu6I0;YAU}^c$VP+xXbE_xnq6b|?I%2_o9qnopcF+`^ zXi;+Y{wqGGfwmTsbu%Nt{qf^K3;#EFrp~3ffjWTHT64;Umg2XX4++WY%`bCd-WL+) zl4!NX*f(fdGP&#t>@^^`zi4ds|4`$N!WNzk=_S5ej?~um0|#Bg>7yr+K>Ru1hSwjM zGD#TQdRQqYP_?+vrDJDg?^rp*MOms8xjhgY)+cP z2a%CX-~)*OS#vDK&38H`5FB=0yOq^Ydu^QO!9RQrwHaoOEh3_`%W6C1jh9Brerc`L ztP~?ztqA;|+TC@l;1;0?Wc{+K2xCJC*S0udpyKtlLTZCA2tp4?H1!yVzp(Cwb(IM8 zL!k*S54l((Bp0cJ(sUcaoJuAU+mjCZgx310W&!3|+$kskM^d=3r~;-|^Qq$_GZ%zy zU(YUWF~}8*rp@P5(*>rjt-u)YmE?YOjchi3A+=HO)$`TZMDu(dlem=G&Rb_+V$98@ z^4;vO+KbJ+Fhv=L51Qd)MM2FTP<}81V$77PWguS*;#=*R+y-6K{&=$ z1P+DEX{uUT0RmirYm3&i0D*O5x%Ub^aQY{}=p?W}-}-B8SE`ZvhMz*3`3OKWoo%-T zat)cj{^_P~sa%8INn1w0N4Twu_hc^Wz6$&SZo%;epVSnWGQ;M~y&jV^3HNUt*N*pP z65$hj<^5M@ufx=-C|i$bB`ivRAeK&xVZfH5yNQq_rf$#OEL?mqqugXcY--3vZVR6W zJ1baNM}t&9lUmt3??JGnWY53En9b-5`Lo?zanjh&z8nVZxi$21nw_IKj|)D*xuz{Z z&=53r{l=BYl}-5DBX1Wn@rUcL6_?oJ-Y$E5Z~2JqAfULmI9G8XS2%e4EO1gJH(m=z z2<8?r?OD@F-3-G-4f8s~6%>iKBld9rb6IXo(n<|Z+EOd||p$4H;*UFTm zTLtX(;^5$yuyIAk^XS3dM)vB9MRN1-5qX_=&uf4-IAmP0tw%88JJYf(BQk>lt3-Md z`uNZ1!fVSZ#f5y|Cxo;@i|`|7U!(#Cb%SQPw)5@y?9tj3*mWflhy$-*^@GmYH4}8R zF~-sHqVVmI$yeJqHX(`!b2fGe$maS3MH%2G6^(s!!$!OfRXNy(s^_oUP+0;N-Ntg@ z9&aJO^8YYz(QPx$e_?>&6re{%k@L%M`GLcqve}Z^WaisI*C5U873Ej4pkcZt_hFvf8SgIVuZ9MVTfe_h-w*r@>A=tM z$4h#uB;}+)>|y<$VccJFoVj6c8%GT-X1l>l1o6f*MskpSgh$}q z`qxyU;&e<)OZ{fC0DNLGBQSh(xb zr_+SXl`T&yOqzm(p7AhrUHW{>F9{ods>`7_i_VBs9x@pm1zfaSlb<#4Y=w4oVX zoE6rWmCGDkG;?p0*ns$#VP3FHl^yu~*IL{ETWMYfXM+G6mV_!PUktj)e-Z??L;o3N z*7hHfUq^w{1!hFHNcMQ+lF?UvDN44Nq_-gAN|WY&BJy+&HL*$ocBpMEVnWvaPN}Uw z^!3a1(hgDhrH+5zABAWZ$^)|MFlY9hQ+a`rbthozJ1Kz4(PqmvjyZrA3$-q;r zfw(hl5uCGARo*H&^}V5)_ui>Yv`m^L{U?CqtuBfM~&zDMlr8!4jj@n_bFVTSMh7sQ2T62XrH_I3PTD>?RuRuXx;&HT!6 zzyIkknrTY1KO#EfgSqpTvAb5vh1RTD<8@;fGLgh{5$YqBzsNNZ^NKG9`ui9W6oy>$bSd^OBh5T*ntfjrsKueduw;l1J zZd-8$()n1kk?mF|$rTCvqJfD6i*k)kXs%*&-I6@(61^PmM#g!XW7|Tn2EfZrm3^hV zz6+9VzUa*w*zIw%4gHpHkWi_#e)3D0Lb1>#rt9Sj+u%|YNGMO=1N zyg!!U>WL+O)km2AtaF4p3SQsL==ap?I?$r~B~Wt`?|5SlNy66<%ocRy)c(#WebgY7WA zlVWKO0?%#9bot-wQT!iQ`ZBon(W`YOh}2B4PmN6p(v{zwITyK4nev*hsB-q1zitm0 zxa9zRZQ@em7|$=GQ+xTwwxae#a24x|=Eg9TrM>q+flJCTd$m=n_sYFRH#G!aN>y{Q zC1dVJ@lNt{QI>6^q&bpIxOhJ;Wv4_4E$-!l%(K*XxexYk10xDxU%+UCE(m4ir;NX> zyQ3j-0I&XK-POR}QFAI<*S9bHf7N#8QB7U@9*-6$P@&p_hzg1_h(MKDg@_^`il9`e z5Qu^Z2mt{ZLP$cv0c!=6NkEB0mP#X0}!YE6ZKWsF1F5Ejebrdpq z?h}w1Sbpt3j@$ODHLI4l2i9brKA$yw(hVqc(bB_kD>ng2qA)U$?b-Rvctf*lMMz)) zAPrN5*$jdY594%@I(})&(XA5|%=t7*pf+(97@bcis*Vx<55sE&0t*Hr`FmDbO8{@s zQdw!Z&dgO|S}EJN!XJ#>kvU?RAz*0@95~VGRKz#pg9YF1)B5sfWDGn z8dw?!+G-?fDk&5$G>iP4?uxi*_dnt4RrQ#mzsAq`+dSWv=Xz4r!kaS*<(8YKR&vT6 zGf^*BxjAK*={@1ULDl)u|0YzuK*sN?C;4>1z8(%^{hi6Kb8Nr93uQ(*%K^KsqK@~u z@XnD)(@_1_+soB%NHd*bqBpGSlNQHH7d`;o#87SX`+GT`;Kfn{{IGj*rmT&VDuwb_ ztyDhz9jRUw!9=+phnD=C_P1MpK~16Ye<`G>;D!zSHDnGA1evgQm&Ji0fSU6I&(8)u)n-YjS}^;F7zJiXf2Xu9vfi&Tz~OIFx%y zq)6p8EbeIhJ#{z8b(olKa#JwET>o<7(Szk?b?Oz{Jk#z0;B%%$0&l@d6u z+tEG|cz#Q;XN{yzV(~3OAp$--9x7C>Y3dN#oRG9LYs*w07x@PUTxnD_dbVx-+pqjO z?{pQ60d(0qy5$qOenkfU4_2>V?@zRSFB>p-9JV@6ixlDaAIW%JOSPN}G{|`+5sw^k^?xqSQ(Z zaL;c#U$N$~=^ptC)8GBM)9K19AiHtfd`8A`7dw{yeSdO&*;u53=mICuKDyaFO}WyI zK9Y-B^f77q#8ctd`3U@|X)A>jb5h1d*VFfcNjwnqvVp+>YGfj43oscM4DsthH~n!mub=JT!+jd-M? z?zu})gY{?uVq`z7P?ZiJO#`{yI2!~MRoYG{4OD+Fp&(JD#3xVbrZ#4_%s4B&Ai;RI zz66STxceF{WV4a2Q)x1Hs(;skL)!D&vXo7jL(g_*O`fxoSX0aoW={ zij)5>6qmj8yF4L54y5>U*Ln7d+3&^1)gqWqxV%sQxbfuDv^L{lM`yNK+< z9Hj2j07-t6jbL|%s&$btmu9wtvz;i%*yaNq4OMm`d3NpB1MLX`b4bX!Ut^Y@HJI4S z#hsiDinshYVZYJahF6RPW_Va1E1`ke-4Q=;Buln9#y}2X>@4a}ztyu7yXK**BC=|H z-JxexPzPSpYi;HG<&d8mYjK9IQs-y@IvzviE32336MHe+!0V2fMSAnL5Epz4v!i~Q zXiMbvh-y0oSLyt{85Iez#A@IVH3N!^z~9iM2NL7I#J`YT1(F$p4g;JmAm+p!BrJo? zF$@4KI|JP7|DuZGKPGSXw-^UW^B{Y5K_up}m|~_hf0dn|om|O@!j3I?p)$0CRBOjL zeI1t;{$0t#vpA@BX99301Zu$_QPVh6j`wrBi?1KJUq6fSUd09Jf80~PYR zP_|zYS9O}&Z8_K`Qna`)Xu!4**ElXlsYlCxRf>zNFvB8yj$nV0x$q(C0K^Ti+0*BX zJL3A$qRUJpCYkzn%~*wyEIO4kR)O{}C5=v)8hit)vmuT?1DbxLNalL%IJaP#97Rc5 zb6bArMbDM|(2z9=nL=ZFgU&r2M!1Tu0_D)@FtMXwFm88WJ@2i6sG z$kKY>gsE)>NyDPOqdd$GmfWbXWYxpsKuI+YKsIj3MsmqXd$92%OnQ(2q&O1V;nDq` z=cws%F%3X{bRR@}2G@0pLi|YD(j9hRP1EPn-FHwD#(LxCmt_KKTdsu)nd&Q;9VHG! zgVA-M9uu@P2N0&*78pcDbTxHuKp;4OJ!HfyD8U~-k$ z2QKkn&1O(M^4Z-}g#g{$BSd@iIq@U<6gKwBDZin8$`K`6cK(iM6?=Sfw42bjaSzEo|@Zc_mYa z)(3?yTOytnCl>!w5xDr^#lTE@zorb>&oV3S4;{9>S4{F_P8)5r1Zv7ykG$9IT7zj@hPEQ?JgooPi z;!3Np)GGiiWAFy9DaE+1eg$ant#`NEjpy<~2|dph-Lq|j)rTkA$7F0~TV{`@^m~gl zXL&jc-f{Te>>`Kx*>EjZ5Jou7EK`PC`~3)-gXD4_joub)L1-6_B<~5zsdli~&!agg z#NLkP-KETtn{u`YFMmS&0VTKuO7Las#Rj5g9#T9|ua|g#PNzDkq`nCzNt?ge0dE{1 zH(0fN*@KDG8m6+b{kU6$c~s**qpZjbLQ~7PEmRX)eo22lsy;?bwm+ZkP9c~ZuTx$v zKthV`B)S7kT+~iCK}1G2k#Id41p7euA{C)kTM$k6YqLISag*$5*W*5!&@6ll<@>ud z(BroSI2V#nhl$V}ZI7MyO0__+r)u7xl@lKQ*D8yeuIg;ZA+I}66d9vlEa8n$cc9(1mHSSF2X2PO2c9pbUrw9)jcjehP$zg=7#K|Qu zWb=ie;2VFI%@UcbAXTj5E1sFnZA1#H3b*Fnia3AN_L#clD=b3K)@?>L^#WjJ9>W~r z`LmoRB29axcIt!!&}8y(w|^zQgAf;iv1O(Re-|DhojBjrrOWrv!gJ3`vNWYzcf6_P z?U$}SNcNi5@%$?ka_R0@)cz@*dE~|KlXsm}d!vdQ_!tsmnYjsa zlD97=WX)q+W=`})QZem*s9_#Eivh3sz!#^_5EuNPpbT3Rn@FZw$*qsf^?$BR(>21$}4%fFj@G^!b@Ieqwp%Xi?G_Wmr-q2K7VFaK6{t#Ecc;JYzva==Z| z+X!hLgtG^Oxw$P>t$S7{p$!VCv#)b*p@mFP40VGX6eh_z&a9+-CiCN$7kC=qd`1Lz z-um8(0=4i>L0kwU*iF`@w!6^dU?OzXqPV$k?8ka^I`0iNFM$NpC+h?Tfy3Llq8jo+75ssDRh<98_J!QUL_T{gv^lL*iZX{h&D(|7R`IA0PNiX( z^8*e5D|4cJ^U0?(Ebs`s*{t|E&zP6#Dfl`TBB*q>d6X?Dd(ABiH&b9?+Z+2kllAv&r~ur5u?wsYYm@2>hiCdD{;3b^A2nLE zKvB@^>e=tkIt^fGY6#_N9?qLr6b{rDwqts#aNSU*-xXb_gpzydWmulrTo-py&=Jak zG3G;?5a4t%u3(u{tjA~afC8CuB<-IldrPSVGW^-#`wCgYNC zK;hwf@vorw{8d}W!P9r2yPV!}oFs?fjLT0Y>ysqaY#Bf6Me$IBiW4S}Z64LkGYeEf znuGJ)tWm&M=OALg_xZ4Pe5L@uRDcD)z6LK}F`GHs@UQUN->XP>Qv?LgD*#O);|sp| z0)=O(ErJx<{Py|9Kj`I%S6nBN`~xV8AsKAs4lIm+oG@G}AWN;)HQ{@~n6q16huK-W z@GLgmTnUf#snrHsVn|H&uT2&3y7dlyft*Auh;!WPJHv@H;0{_*k-c8C0v z3P;w-FLy?xA^YQu0e;PQ+JaWG=7)_QA94Qp<+7J>yE;y4*z4=WF#rt<{;i2Px&DCH z%Nwr;0+gSu$GPW|649kygLPP((K^2l&2vXUJ>Z55^iYoQ#?>Q|4u0z3pr1oh_^?Pi z$0N6dCS6|}X9HPU+BkUb^()!_ZLMv)rthnH@mK7`j~+%V9Du%9u32}XU8qBM=|5z9 zj3$MTk=o#`K;;>z~npO~Ny>QHtR{ z|4?jf>m=?@-}L08Ls`I{zQe?9+@`=As~2^qSgqT1>W_x?w6fw9RR`a}YjxJC?v>Zy ze&|l96{fgKtJxYLMMZWOEi+vsybcmlvXA>r{eBf74G^3U6662R#G?P7$HSC7MIxcQ WK&lra-Z@K8eCG6dtJ0HKV*d+p2)I)K literal 0 HcmV?d00001 diff --git a/Project2-Character-Recognition/img/XorStats.PNG b/Project2-Character-Recognition/img/XorStats.PNG new file mode 100644 index 0000000000000000000000000000000000000000..29560650636b6560f9f3ab5dc5e74a912037cd19 GIT binary patch literal 38219 zcmc$_2UJt(yElp*1OdwkNQsOBB27@Kl8iVCNK>$Yl!%Ckv_L3INP^OI1fo=t5*Y+U zfd~j8v}jNwprC*RNFYW!frJ(U2}!;kXXby-|E_z_cfNJcx_7VT+QePn=Y8L&{eI80 zADnZv-m!h(b_ofI9kw>VJ4;AxDDduSEUV&4KF8NUgmzo=HL<&{&f`pkpSx^ z{c||CTh;q{|8taGSEtpy{^z(*{T?P-LNg4E(G!zG=Ae*|6-j@luUVi=ho%4H=zNu> z$P#@=3!%s8A^|jd_2CwD*%})KUOj;RG5Hj-NJjVbyHPy34BBtoudOw!e z`#FS<5nfor30R=xkTvXj`vhLpB6_(75|N=dQJl*t!Wsm+iC(x@NBKH z@&&a&f6}F^wbsWQ>+Hw2#-s&gkGVeVDxO<<|E!|A^P^dC!}_hIQIIAlcZxZe-Q1P_ z*IdVUX9FwrsT-ITdX0`MTO*57AU*55iK8RyE}{|8E#Wklx9Kxv34v-@Tf>UzPoaWw z5kj7Yp1}z4i7-*?L=xmvf|Ar-Ybojg-UN@*XH0$ zRxIO>Ro`&OPt6T?V^9Twd9~!@x-HEYARmmW0fUSaZmvHMa3rvnMBdl#RhXz-Q~>$9 zo({-yQ}~#Qrmbh8C%1hLS$oG0Y!yKLQ42d~ffW?VfmrD7Zo%LLSFi>N+RlaWz8?6_ zJT|&%5hJ@NJgo4|ITO{F$U!~U)9frOPY>lA>=T$iWSCn`g}V_|Zb%1TZprG~#TtOH zH-2h3!T*;eca-A!^yPHOw{=etHjMCwO&V82EBhEMh0eK~AB@iph`V)JO@X%P)dBPu+KUIw+>*6H-W3(E@^xOz6g zaw_7WZKy~QJ&4MWl0Puc3(D39#I|j1juw19Q>fZ19EJY+3AwwjlLB%3@kB>YAdta` zUgS1S@h;w5eHof1B=XM>-TT`feFu#sxoA3jeeQDz?_EfM@SSKL*!@nnKp_`B#udTA zG|?Z?6T%R|XClL`6}hjn;fRN%Iap?#b?xPm3~dGO9MnZY(r290vJLV|qW~janJ^a? z@x(k;E;UT+a=%mG2fewfz{#|AmF`nnZ^{o}4r~nnN)EH@ITK{r2#=Y!Pzv^&$^FXH zvMUib`!26*rix6HPLGxkM-J@$dwF#`vrg8FK8u9JArBOnNLz0eO%uPY7eNHw#-agG z0}m$ZL~BTKiR+!8MN6V}&IC?4gd1#a-h6^Pa9YM^nrXf~cF*;Suqrw6#p`$r!kl@) z)Z#;-n*m`dv=i3orP`L-`wANU^~lBS*Q=weamgyJ1~X5(Wx3OJIfD4pA(LJ7fg4#c zIu&|D$&&bZ6yN!0=%qmEy?-59zIF;MQ6JV{NP@1@bJ25hqVT13E}g!<0^wneMS%4q za?yN@fHlz#c%CoPfS{L!XO}1_0nV7!`D&IiB2Z+VjfyhsrSyvAP6n>tzbAW~Bu@q*XTj@lk^$HO(8*<;*0Zk;;a z8Hr1Pcc|i_mGsEzKQR?`0q8DnP<0DlRLr?-XCZ88IFUsTZE#XNoR38~V0-5km&k61 zO#bt1WVq;cC!zfkUbG_OO3o37)@zCT);Yu#ikk4&5(O=wJS8qDi$VbJ-4?QHu|gCw z0ka*#30|7$(D;wCwpyu%hsVySU6wp)n|((YY-&pwt2O?(rQOhO{3t&%${k7Y+GoF) zXmAl54k&KW%JZr*c!|F8mF&po<&w`V!mr>oMs=zGNnid7P1eYB25TKf93X{{&9O|$gZRf3iAdeWUc_lguA zjvQV6Mkbl*}Wvoe+5rF4QWQf zzXD=$+Bj1S^+RhPkU2=OF)%scbm88=o&Ho{IhESM`7Ot@d&@2#I<3?(=Cw^i;w^Ix zhHe?j<#SXv(gZ_!pbe}resFNM{>Sr)M#VAwuYIaj}f= zSzq8HOh!Ns>!M5uks+0nVXz_@-!R-J35oBVAq(#0#0=|WNt7Se`pEPBzK2{60~ca_ zzULr1%JqN#IR(iY-AH@C)R$3c&x!!v*5qYC1FlHihQ=d+)3aUwGpA-4+T8Ogm0Ll` zb8NOMtLV6D$&&`&uMJ{#QxI@s!Ksgp(A=d1W1hkE&vQKq&3Ya2@@vB}cw9onSIp9TUW&b0?x%D-{F*su5WnoEwB+nwV1Ct)6EErvz zbpxdyIIZdtu`TAIk^1jot7<;`z>}m!{bM0YtnA@CY8DgW z^&s$-V=X)e z*){P$%NX-IuOXnRDOs*P=>GD%MV4B-S?xIIbgVj!EU!{7Rou%!t>>MN;tp%icEeC5 zf)j+l)+L%D9PELCHU>*(JZu$^8kf!76A)i57k72xxlbAE?*~wTK(T$DBxDFu`D(y+ zZYrPodvK!}V?=un&|C?dxqU38jj9xvwH6|RZ((=|_dR|!ng@f_fq<*~Bfi5#l5eh5 z&j}e`-^SJZFK@ifh^PoJC-64_mz^6E#VRJzeX3skuEQt)+o!M6T!vjn8}UJ=QAT^S zEFdCfi7|qNm-F{t0DT_{xf?;3Ks1J*Of|~T&IE>fYq$zy$5FXaOGx5zJhfG@AezzF z`rty@LjW@wB%oA7tK%uZhFiA^`{+W?*AV--KN6O1f&TaZ?|&%6TE@;l+9*O zpe+5M_@P@6U^v&dB!2JO5RJ(G%!%pPYyPJm38|f2fa8*RD1^E>SGkOHW$S;ZJs^$EYS!*5q7IHPyh4hEv&KC1~5VJX(1K4QM|akRlt1n zbD4rF9KY6#D+*9> zt(4-q-bdkI_bY&$!TnnMwo=SATbm(gxV-OQ)lZ?PDQ635pw2H{0oF$R)B>w zex=sAa92<2A#bwS_v~GB-$BepHg5AweGLhTn3X?^Rb}yUZoJoh;PgV;0PtuLx<5z; zSd`Ee3vLeidedWAmOK7U@(tj*dY^SuQ9!-RiAWQ-sr|VYNyiJr&1o3u30K}L9Yiv$ zJ0H;Z*a&+DcO`{^Vf&HcLcy6;tay>wZxncfs#>f!#hbu!^ zrGN4@U>>h?6?|OuNnI%z)M{9rMQ@9#l?k}v+p!ThI)gg0u2=(k^{i=TE6Ap zhL=mLj(A=%Y0K{3Jmn!T>uX^if1|r_xI67_gC@-)OV~5HXWD3{X}O_dWj7UhU95M9 zmKbWcYkNG=1RF*w7*P9TBum-CJwIBDyvD8DfE&K3(?*XhV#bppsLjCSWq%sSi_QNd zr+7JTX>TzBi8i5^0eXpbtO8=(DW#5ciAF$_8||_L#y=-T`hR-@5B(E)CYYQJfMp+` zjTY?$RQ@-I5ikv_kDuW48wt?vK}v1sT;p1cXEjdBPN|RK+1d;$!h{+uuVVQvw-ss& z6eJ|vA27T$KTn-z7{^zzCnBelD$WtTC12^$D7UrN!W)j6n3S+66EWK-BgFPLoKou! ze;knrrxZrcxeou?+z6kZT4*d@;s{O8P=d|ad+(h>lDpgQ;FmT^Tnz!!n6Kqq2c$Q$ zXfTKZ1BLD>^%rGQQm~DzTv|e_N&vNS*^59ciJNi8sbIUW?Ev_YtF-L+@{>=zl%lf^ z_b0(bbtJRlr_9k`omuq_(jC+fTSi$PdXojr+~YnzK?puix9PR(ufoh&abjcCayI=X z?QzDD39Dp28gzq}f7YNTpAL1taYM2y5rNF>@sW??nGV8CYm%dfhrll7-tcqS<;My> z%8|@8!B`iS-Pn9;_;78Mtv(U+MbFer(9V`FQfDrSFw3 z9GxmH2qQQ{cFYuYOb)r>YEn~94yVHnzO~v?BNpy_4)l|d7#YsS%6!{A^-Tb>cgEw{SI}g2klY85xRbG?x(P5-BW!L2{PQj+t~R zV|e!~>5HEEVL`^4dFeXzL|&b+aAX*zH(A-O9>s#Yrn>E_)7$2-v+Ybte!8$K6~siG zt;07NJje7_H{`>ax<0K&mjuy7TySGr)zVOP{sxKLmzPL_kJoXX(#+?2XYXpy`YDpm zrX&54#I7u(x}~SLK(C6uT|jpD>=7KNDQpgmPep;|6yc+Ds3^=>-qyL0K^&x;Y^*(u zhU*4}Xa!oJ)}57*9?aDTcO$NOWVY2ERn*C;x?0xVQ}x)=;5h5-A8c+4RY@W?>K797 zhV!@dslMlj#v|spd&#q;`P1J9o!f4Exy<<#x7s+$+-?B3SA}0)63`ICkzV~@j%)7V zGS*4LaJpL;(l$4>N~PxcF_d$$fCd`kbEjKtf< z!=kt=kip3G@xJG#do?Fs_iI%eIIJXavdj(4aAhl1hZ8bZ8{#!Mlq_0ROzv73ONGaO zQ;>Z;JY{<3D?KUiVW0O5Eng$Zye;fgnlp5{BcrXxyfR`vP=N*F+f0RJ@$6Gl27MwT z>gkm~7gktwL@<_RUv6}_OK=o9jgx@y(ej6^$(FW=Y zRz0&7=ST9qnaikpK0CK^+tNj@^N!moiXuAiG0b2z@Lz))7S}mJw)av9T;aZz!ie>; zy3g8)SCw|>aP7`wQed)zQOA@N56_1_weu|$O*&h-_9&3{A4Wa6nAhYUz4*rQ5A{9E z+~a;f-XtPC>9neYk{<)~f9LT&>yFyzXV)T2KCa27X2y@6%pnJ4+vls6MQI;%$ft-C zr4P1so;@W#B*W|^gf&K8i!Ze8RBSsNJba-Zq@o3HQhZJBn1V+)mCzNks;aJ+|MBi~ z!dR`%oZqX+trFioKfo~NnfH$012)P+dw4BN-m5_paJ}riFsR{1>KlwSH^Fcx>nYmN z0`QJHBxFJlSy^&r6s$+HSR;q^TSf!iXX@R511|p#WB!kSk$~J}x`)<6%w_ih_{OQ! z^^s9Vp^Vs9OT4WLofPLw{{tc6?cvg;S^}D$%UsJq0vX(F5=`#uFJkvku0qrkX)U>I zss?|^VxA*n(QuQ2ge!Ua_>r{{{mrJ!Vs~zuiW7?&18Y2Alg{RPeLP5$tEk|g-`EMjri{Ry zEG|!QG%1fFa0F7@CX6C0M1e~WHqxMR^BpHA8p6`FXhmn-;ik09F%W2c6d8_-N1g(N3m!?mH$+F-TBM zOPe3Wb;i*{hGg3VESE^+3slbx>iUd@>xKDmb>%2dX!^JMQ`0r{GBt<`&Fcx%jsz0vix3+B++NLTUq;Jt-Hr@zZ0~O0XkDqq=_P!F zJr@o+_i2Zczhx5hTZ@S_xA(3_KhyXGIn8hRbV&11ojatIm`EKJjE@*x>Zd~xJ?w`x z{$^kXGYJSh$>}BLfxUPR!PMUdgQ-~{8gePz)x%^h+san|BTI>uf36OiwEDEKm}2ZX zTsQM@glC`ooVhJxfj4N+h)mCwgDGa)ibKK zzOINqN~c$2g&%wa+I-A}mXE2{4;Qvem^PfOkD?vzV--#Jv|of=R@Hi!dDq#?Nz0+L zX30Gsp_ONi$2#Aq>(H2Ale0 z=n&jQ)L&`nBP7eZ-+&+AO(nUYWEe(IUTj~o{|~8H?8Vy?Y7()d090~K z0N(gE^ph-i@w6R#dCTwCWi`TGI)a4rWl@2C*68VP=b55*?4DzODp!8KGZlBlFlsmQ zT)_(|E_1vk(?FnjpHPU#>aSWBGhP1Hr=!`W}Ew zJdnsAp%97Qv1l##_?N)1s0QVhpn5b$df%K4Luf-pub;kUWK24(vjHSAuU5?%xl`Ks znqAWN7aYomi5ye+vGNZHgxcSBO2i&GoQP=lVwF6IS#yVT(ie+90Tw%IEU>-8woB4g zXrg&G3bAcTyOEL-<3=tKd*cN44O;xFem|JkEl3s3=Ec*OOY?aaVWl+#r6~Fm5W@H} zT?9Xrsq(DB?zeh#Dh9@BVbl3nrMS&-on^|CbJ>e`&O(=mbo+y1npWxzTZrX8-`Mb9 zY3@8y_EH4Zn3mm7Bt4$lDDgHUtUg3bcuc;SX_sClr#dZqwWw|%;e8;AY_c^y(z8bg3AQxm?FXO#>#MBIjqk$U+2vh z%s7yxwYD+9_Oa98-o;(=Tlp7G9#^p9m`h!3P%|5iP=hFNYl^{U#YV-v=OrEX3HfZu#6Or9y_=U-h_%I+#@T;5Ymmr{i{lyn!q{ikNK6Os~5$WDuP<)>U|c{DXh( z26U_~?qallGbd=coFQ~0nGsunNaLCV>U_YoySqQ=k~u^%;LR=<@S+Xs0r0;e@Ssx^-Ik6&wakhwMbG!Edx9Y|AO+0D#(iL;$+`Y z^>Ha@j68n!gxOoRRNX}MAkfU>M~8TZo{$$0@h1+Ccbn5dF_Zx)I+<7x;L#ClJcx&grGJS&yCc3A;mi3)OwH-~s1mW()r_n4tW+(Rot9xF_>Aj= zRs3D1v&)vYdIdHTcA_n{|Vq%So@Z;-gk$#j1Z zy&7Bhy=rq)sfNO#fC|8?OI!uQ@E^rdx8b-Vu$b}$D>GWGrbqW|=673(9a)VyLy(9) zD|R~nb}AJ}v;kuAUY^C}&T?fL!rIZ(T}%|3{?A==KZMjRyGTg- zba-&3j^-T_vKcS~wkk=$Y%9HOfQ!`Bo6rsn$zy^Oo$I1 z_j!;#x**CCJg5KpRN9KMx)?ohW4^)Sj5}Jp^who6M146F(cj`Ajx(?+@A&(v(abI_ zcbtdvXT2h?D8VnA9Cb|~^;b51e+Kr4ev-WBGC2-~nxV?ss(yjV4^HOi=Lp7SQfr?P zCtj}vb>)GFU2e9137%$~0c%*Pz!LmUO_WtV7q;7@!BNv6bfrh+v;xA1;@bZs56RP) z_&X1AW(2&l;0nHIB&uu9WrsJXTR06v#ON+`09xqPkCxuAyWvZ=ZK@u5U!OJSbK26- zwe?TY?6H~jJn|($F{XK&9X;z45}NV8Dy^ukbB|!8R{)S1k2$T)a?;*bo_kr;AgYwb zEIj&8dyK~2(@EBz*m*d55vy#}6D>=5h4j1Er}Dwax2qL*dCs`Dg`!YmR2Nw=6ZMLK zJg{69j+hp(%uzDz{A)xl@=!IA)ZL1-?CoiZ_+u`88rwW&h`-*nNn-wRGmL#{)s|!U zc>2qu-0#th13Pru<14sWs+0L+yR~=2CQeCDSd$uO3lA#P$?gWAoYKmN-<5nUoH&ga z%PL%|QX-IPKGTnw>>FG!dg%HK@FG8^Ph%MGn;bLzVQ}2LpA)c zqTOV0_BoQLU|FGr8DBN-o-5+aN^3|+^zVyDkcW3ffY{lIAVE_|-e!%M=iVAQuQ|d9 z*K|s+?NRP}Ro!V*IO$F}Upry72)La>*r8kH1>m}2U0>a~a{j9~!5tSQKGlZ&CtQbV zZo_bXiowa&G9I%1XSSO&u0z@b$~*|+_l6JC zEYxGhF$co#2FeE9SL!?ce6o^9+Jb*$Q0|*o7TL8WSHs)73k@qDtDJfEX|&d^q${=0 z!X&bXf=Cmb(4j@mjnKJ$id1pF1UWijF^zz5d5TQ_kIcVTdt3*<%hVinEnRw3Q`b{5 z)T%pOPo`=Q&wU8^CsP_Z!}}=pP6#AWe|eYxTa5Jf@Cmp$UjiN~|LLYoWtCX13IpL# z(vmDTd65MTFCt#|XBvQ`$>4+?0P7Yzy1IQg95N6w;WXeLvsC&Kx#`8l>0B#(Idmq! z6$!YZT6+HwDlWtb6(*=FaSF8uGEe5}pq=PiwzL93eoYM%=-1Q0uJuGZySed8Jio9u z0;Ax33zSWLoiD}ZxbYD_U8{p}!~utyUHT}gRf$;cIb=Q2lZI~O$+P%BRC|Ak;7+25 z#(t-Acd?i?E8Mocg{AF`b;TOs)+0K!Qo8(%#_q-MwecvF#pc=P_TJa%nlx<8?MKKd z7*=|Gy5Wl^e&X=tiL%NNL90wy@1Pyle=}qYt4SI!b0A5xaZI``igW}(tccG4D&etP!0|L z?$%++R9ifU9LOwV0tNrMJ0&9A%-$ibL4COOiOnDEl{I}E-VSo&M!HBes1BRfy#wrn zyrrb{QN{hx6e;@MTSixgdz9>CtD<9e>r?kHZ5`E1n*gq^1O|N?3m?zk{HMp; zNMmU(xIQ6zhBrt;HU{;`HUtv{&&g@?PeeLrb{Pq}Uphz&oQ+-)W%sb);f;waN_5oA zPfabaDIAy2s@O8Zs9%WD*3|WE0J%FK1qdIc=8Ngr=O2!&4OD9F*2iz>3jHqQ$cEv9 z0+85|EygXagg?{8V?UG&&Ck{2Q+Jq^G!;mD`S_M24QoN0ix+MfMy=0kHz|MwE%VLd zq-8dG5}atl@musb64`d|`<~{Y)wT1u;Z~$+7RRGbn%nB*%MS>uH(2(!EmNIH-Jw2d zzPBMlDFAvi%Ln-t^d$hTLr`)jZ&y7FFlTesR6vSq>&ko^@Yr;f3}<*@Yl=1nmU%>b0r-L30|ku0?@OxO^qM=b z&&W7zuC%qT*uIQL^C=XmwKoSF9!IV0qRYM1e;1%{EFtlvNP)HCYNehy)DLX^S7s3R zFFxYGQR-T#h8fV$-pMf7VtxgAi^CGIh{*xt3Me4`IALc%>%`x$l=A8iaUIqfQ>_5k zb|7;DSjuNG{atY8&w85mUcj1pTm-xsOzHw~hyvL17r*`^Q!j zUJo~y82n1Yd*0~h&++K@%D9=r58khM_umQopeD}+G1L!X9O#8`pKI$L3V66PI5`E# zDvT=SexUed?2JJGa;wiLaSB+>%SHGpidefB`VR+*0sXKuk4J)Q9W@%xpI^CxegMMDMBM}o@pPwjUaBJAjQ zzNEh%Bj^!b=D%6ae&PZmvk^altpGGmoz^Iyxfx3vhJ(Uo3PTK;d4u6Cy%Ur?ywjyWH(f=z=+`YrFR#TCA$+fOs5hu!Ta7g)@V}x=mX|VU2lTpJiAh1c zh-nU@XTqQOod0I!vIlgAONth2m}xc6dG(vREvg_ov1w1`o2o|?gka?bbh;9`t9neI zy`L6x4g{J?Gwx6o4U+^%ZQUojn)|u{icyfUb@>gH1J)C0X!4s?5uu<2ppYP8dJpF; z@pZUqmh>2{>&F9~0*aIS|67$)Vr{gKTOsUKoa8*9}xd zeqsT+c>WILt=LAI+B+dCs4FwyL|m_G%5`H1W11lw&o17#()_Zc@3`8PG}UkIBK5D&LI7{cc^WSsO+cyB69MU8CO7drD{G{ z_NYJ_UM#H`@^i)3*&9V7mA;Wt!|zvtMp&}ncJ7BpTGsk(*ml8U0+vcv=1J>ozr~d7><~txYG+d2Fo?EN8cC3Ff zjO{uE%)~OG<>NPPGr3b}w=PcvN3mw*L*Kx_OXPBuZ}~JlJ17JL-WTxmb!jm6y|_PF zZHchQ=L+}7PotDn4+ANuuJ=t?6z@D#AW5RZj4y^DQT{n6`44CdrCuiGR2P=P2LDI3 z%h^XGSbR*C5VhPgvHW2`Ji=BSBt!qubFhm-5#T+Lt1!7|vc=L5RX{Ai6ZgctJJ2_D z;r%{V1?CX|nYlnpci89`R?W7RI`8#K)ED-h6}JR7k%u8`?P2)VZQiT~bt(rog0q%P19rE6PHJf3|p zu*tcC0e%Bl7_h4MQd10&`B^O{n0tbPiZ{-W)Xf+`3ew?b`CXqmBcM_6Z_3@5d2^L3 z4Cv9mJ6(5DRDu2^6*&ezw<9{~Mijuke8_+gB?I;w9EM3>KM=m9HOR`w2JjYkGAC>a#{z90P0e-QzI@br zW$ER!sj+)+XojkY^=qHJ7$ZHUaYk8HkF^hs+6##aUeuKy!3*~97Og&8`VC+Eq7!(Q zbphR{QI5k25;lQUR?Q)?0s-lC-r?fQ}~1<9D+=&qX{ zpEW;l8f_7DyZm)Z3-zYPNp>;AYs(i{Dh6wy?y6VjuK(h*_uxohsgRa<($e?v0nFGN zPJ7TOvsc-vHS|fMGqgmN4j;PB6#g+fqFfi3{51odID4v}Z>}*JR93@jDoGV9uYZ3& z!je!*Z679{+<$&UPtubI78wMeW<>q{!k0VFhv^qb*4&P)$FKOC;uqjYvTfrK6RTCK z;{!SHFFvRBB_=#nf(N114s3&6S#>sbq04u6boHL|W8QK2xfi`#8Q{&YJU%Y^?xk(` zlzE2>Q)g)JC>iE?H@cM%_U8XU8K)C{?!5QyxFonZ)ZK@0gzi~gz~*5lRzk{Zij&7(r|W=SFx{KP_pp&H zd`5Lz>3XHUI({!EI6pJKnXn-wpis^8kbZ*StFlLyAF7*+osJ`;EUmM(Q{H(fw+}Q9 zt5IJ()8h~SiJM%$e6y5cMqcTtWqdq+4;<;pvh*%nN(-%@5CGP*a+D``E9qTxi0;_$ zIlt}q{#DP!G;1i>p7=)?)I#8h}9sw$m@?W^D zKVQPo%TCK{%T6k3n6hN-)z(r2#+G&d5v8OUAeubWG^BNAi<*|6XD3dcd^CHfi)@{U zxvGmE`;{@mX%7XbIis)NU{Qm)dx>T%`$xxYID-wuv(VdHCEi*riQ6o~t7V`$A}{O? zukGCkq}mAviuRawH?`N)taRcC4?JdPxZL5AU z4E%J**v3k)NmS%+0u;nOp7;LLaIcYEj=a%cmfqy?i<9rnw0eH~mAc&VY6a+6TGD5X z9btfvs*%hoTHQoR6?g{BKz1h!+b8b7{4J%WJc`(`f7qcUnkwJch#iG)Nt3YnC1bSP z_Sqv_(;Xdz=*gA2G1AGH&(U7HBO*yH9YOwPI5SCZXFO_#XR+KlI%y}Fj0RJB1EUs4LkW;3%LkD3lZyE*#8vqP^SE@EmdBx?` zm&$G1xk08GD+VbH%bl#Gf^UC`_6KHs>Y4d39(Eh9I6cdXwYXSvtStLpSXr>J_Q6t^ zZbW3sitn&fg%w0)KMgbXdo5zq~v>cfbKr1e-E8o{sQMWHmJ%{z9th$F2xo+g?lykLWP zev{wd`^~Z|A8PMO>OUkklPEv?{<}`NMAqv^O)M?e^E{2sOhJ8PbTp)@9D*Nbj5z!4 zw6Y<3E$(9oO&KF!k(`dmzK922%h~Zo-^hUS0bEV&z7H_D6V$IHOMpQJ^yvV}GV)YC z`LQ^{9siZ)V-jag+zU?KTwVxLJt$TpF>Uj|X?_g74}f^JFGb3Dbpxf5kbVssguW9p2}UuePwF3g(Os{{Y@HO3)0e=%UY%hZ zg2O#ENZ{+awptdjZ5Uo{tAgkqK7tR7OkOhL4Dvcwtac7SrAw-Cb;P4P9uYWd!x%|1 zQa6$>2$|RJxjyHxdsY6eg0YU;Zmr9LrZBdfiRMe zJn_-z_(C^VX}ZXy-)F~4sqiIZ=*t`h@2hJfQ{5V^C)Zewi;~i^j?tYw#JG448!D#I z+fpD|>6H)F>l<@HBwvO8sbFa|HT)jLYV%KdB#KvJ$kiU`V zE*A3j@%7wTpq`WGpYmYJS+KU_UeE&W_S{COk+Z%9$ukX}@au3=00ujNa5sqtb zfoOc+%_=>Q4R!A?T`mCX3i&~MS^2c3C5rjIoeX{`ZA2t{|qbS_BECa}RbVke*^nFZ1T$QtKf@8W*i+|pS zVTR>dp$C6@A4%@O=kJDxIf}bqZxh^C6>s})G2;BMyXqcfCn5x8zwo5tZjN2S}ZwzV&GpU|RFd|!1FvP0FgP@N%=3&Xk0JFW(* zO`Lq343y2t3f#z0Yl)z?X>CTZMtNypNys4DRNW z3jH=O_|5L74*p6@n(t50TMKwglb<~0W^~EQhM8mvDM*vwXAtX`T2(OahVD`g5)!< zUffqCbUiwv;|{b_VJk44jL1tHXrXL(3adC*X0KC4%|7L>y%GIlr=S-T5%r9he$Da8 zA<*uebLwP=+rbd9{$yxEGLKRIHMU(nfXJ-$~5G(ajtoP|8vNQ zhD^CE7lnw}R=dN?{76@(G^`3jMP*p>K= z4Ax7>jYEsARvk9h+xOYN#cXgRSu;r9x6%}R(mh$Jc@R{!lc#2shh4tj?)q_8cYZyL z-5RLSXGOO}wG6b)Klzf?DzC^N$%S$&m?th_D^fY-k^Owtrr_Jy%%*FBrU5hM?JfO% zzeq>`HO@>a=C}Blngkk99&LF8w#-sFfR1XGKAmOGZ%2-umtj_chi@=ib_7=8cKF=3 z9|$QC4GB7;A^JDe{5s8@zWD*|2SxdM>@^3Hz|rSGi@0e<kcBx^7lmX+pfoFtONhhD=I^16!HJ3ydsJ zSxK)BRHj=WVArOV_7RR4I|G7eqUakWj*u_Q?C^;yp*}bYs&0JHyDKCA8>8hlAbiB1 zZ~6vBC!!|{YzKc$&i;kb66A+eh?p_Z(J++1(dIqvsy_c&lB+D~n2UHuNFhGez5|rRpe<~BTrv#gqelA^lD(l&7`3|n*zP&$$eP*i< zkiXt{o6+6fEGw(sSyi_;bq(x&HK;y{WEQ26(PQ$#(0K}9Hs7C8_UQ>$X)pdoO)VIn zdi=(#$jI^tSLF=uK4r1Oe!rP!P<2bIEWY6Sb*tpQ_z9><`NP8JN7Q9g)6EJhakS~{azgHE-};$~S>n%T{-L{UOm*iPh zx$lY`q(Pw@ZtVi?yd|!n{w#0&TmAIktEvCL{-O{3)94HKg^CF&KXn?>@TbFbPLknf zQ-SXX8m~i7{lww189?`OXT_QjwfWXK(1~vnU3VZ60WBRcUi+`QE_BXa$sVH2^@MPjIAC6bk5ZDrNDjyx!4c!jp?PHun>}ncO2ESZLaq``Jzq)9 za^|*3tNH0|`Ck%;8|3r&-d~(=r0#1KOo#giS2|Lx-OJ83;465{pWLi8d7S$MwA8-V zQPuM$NGT~!@UhBz#EEx8mB?1W1FhOmFfd}Qtk_GxDiB zuo0;t%Q^gG^6$?X6sp6sk7P`fYyeQZnPB%MxO4~RmpUbB8=r$yZgImIuZjA}(=eqO z(dt@Hf`OgVAn~I1o-1y3CY&+|K1?#g-goSAo>$`vet|QcQpzxu5v2DGvi-c2L%uTv*gjq|z_@ zkvE^zA9!MMDR@}N=A6C3v1{^yaoE12LA!y$v6;bCLP?#$H-`|dAfrgx2_t`4w+f`L zNW}s!8(nU5y8s?qe-RyVvQZ>ljQHfa{0BKeA>wgb+DhN#K~6f*<&D8wQ$2&yUCaUQ z#58LM2`PDr@MygAP2p)$g2wa74%a$_v6N|CLP$qn=AQm zbeLTR9~0h|88u*PqTXCQ*X^mB*w{6EI}25_HLUa%vq9KVn+XjWz(6pco*1OI+9S+b zJhevXb_NXqNnJIInjB&KoxFcL;`M!R1D9=5F!S<~yY0@Jo*{-NH*dL6-T4>N=fXBX zr;K{QyVm(UpBm+;weKn4S^(D1))8$IgiC;FC z@-AT0i%zkC9vD&9z1Ooil@dOAac?px0R6%Eu*xa6UlTGBc}q2EFzlscobZ#!g&iHb z>&#t1!6#deySI2gf#Rmj3OL3w?R@$Wpub~_4v2k;;2c1BTa3N|BIXNahA?F(YYF%` zLIwmBsw7^dm;s48u+PT-MUM60B}woV@^!Ko5^4i3F+$zjfo+Lbm}j?MS6Lk1CK3De zJ{eX6_IGDVk&x8Ty=nnaQ?9>6`_Fn+9rH)-SvNfuGqe2QK~S z9L9Zi)8uCrD2`$m66hs5>C*Ml`TRkLF%*(Uf$fruaOym9-Rh%jJwA_eA%ELsp3SBQ zfxSAoAwSu{?#B4^)_DG>+mcKV>7;~IgJtCO77jBX>6LduTVzY7f!nst4)?F)2#<4?_Gx=0}y7ab%;{dhPHz>tFj4vK*v%_rvcDLJ zX|W?M)s|BKfmh>T26qjo`k7MwI6JEvnr-jyV%h$zXD0yEt36ob_ zAp-#FZB#o%!!yne5sxKuf3F8cU;0b5!{DrmuQ?X?a8!qps$yvVo3tA%EHVg6UA8b^AOYU(D)V%ofI7_jUA#UbrXQ$Ur2M?uWP4|ctMXt zw{zWJCn8SJ0TxM70r(PCHsUeyaMR(p9S9bxIgvX8d_$@A<}Y*{SRb`KCbW;|cf&IN z>J~*RUTF1XQ2M%?w|RU_R_Hg7;bMEly=K3qXp)d7kNH?Z{(59s(DVbtKuU8#Zu~ai z3uc07|HVxCw5-hEc8OxJj}!MYtG4o8(BEJ|Q!7lWQI_)7Lht6RB#lAJPn$qMF~Wpb zw44#x!*=WxfcET9&I|d1< ze5n|77TH{?*UEGT*1(4d6kBdz{y+|UQ}#$jYq;E)ra*d^1_Mad8i#r|v~QL3@;_s2 z22D<$!UgQFU(hQDNP^0se>Bg1CWC3uH}bz!Cifc+jkCOIQ1y zaiQM3KVi_@8SQ`gv9hIuD4uz4XebhnN%0Yf=I=pj>JJG>e zGk#j;qP3KYe~!nw{)n@ku9)M8vJuE@o|QpaTM^bP*7eXcT(S<;(8Rb&VMH6(j1;HL z`_^D`vy7VzHLRF76{pmVz~^%|KQHYRXVX3xW(+m2%JFtu03qU;rkE`G-+ZENu!GBU z(OFh7PcZ1JMt^l-J$w#R0c_yN`%)mKtJ@<^Cxa`w0?nNHl=M;T9mjfH=OrxMVAM)? zEsMTxQZSOsBtY?~`F$=#)C6=JxwXFvDKo8URA=|diMH0TWNmvjuGUJN%@Y`8OG>MAp z^_g#6sM0qxFzbJMRgC+@Z?rsb>AtSBfgvo4mw59J_-5VV*Ty>r*~U_9y4+Qnz0BF? z{9fYav!ic80Uy{PU|Ft8LyZcjYz?*SGcXSvU)WL|?u8m9k2gFxPII$^+GTcTYBtUa z`^L-<6?tJ_);wwl4`&D0l=%cX@J2mJZ)ad^Je&0?lIelqQk)io@YeP8s=tSd=U!5L zwFZOG{BDbIigaN4-)?QWzA^9Vxt|z2e)#&2)D47uNjnAEQ0jwu3**7!bX<9F`fL4k zi^}4z>frL0ET14fm8Cc!yLa+@u^q`>UAv~-s_oy{*SF%Gd*@_mw|PiJxf8BN*gVS3 zY{J0Vb1bWCA6C`YkU((H)TU*3#B*+0UGz4_2LnV@g%d`D{E_a-*1T$$kNi!kk$)5L zb5va#{;u9it+$Mm6klznki$u{WHx=P0ylT@1Xz)3zt{XjGCS!Z@sU`2y#mhEqwVv8 z?qitWu3wkx=$e5+U%;cw-;Gg^8qg*?o|lt(;f3@|@z^>#^1fZ$HUF5di!**w&wYHJ zUH?nmO8690sF)uul1A^o`c84DcwzRoT>DZGqhGfNfYnD$q$c|3;FBZ9=daAnqScz= z-OmGewsrC>a(S$^KIkRgvpzA*5-A|Q_E)|_*!pqern&(D1Tcz@ zNJnY*Vn{OSUR=1ojF=8ErdjxZ|6lEWcT|(9bi6d4pmDFTWV>7%GL1!>ZR z$RN^tO|VeLk)qO(BBMx`PC^NxD3M}Nq$iX}2@nV%gkVD8zF%}^&+M~j&OZ0-bl1t@=THFeJt8YE*M zKS1yXV{k(^WLJxp(_t~$I3XrOGk&~CNP=~^06cT@K76|P6+Vk=dpUpeTb;A9J9~-I zW;3s#zt*On{vx^tCx2JM_f!ff&P)U@?BA(<_!Wk&d)``QQz=#+eu^J9OL1Sg=XGO4 zdG4x{y;@%aCtbKzz8<_5ni$Nc0qOk5e;}ivi zAF$od6|sT7-uF)_A)uw>zsPO=U)dj~>~FsF2FQvJ^wME4a9V6^FMt?P7P&CR@Na8~ z{ObG#c@IX(R?AE#drvzv80Ws?stXyc)GCc$B5H+v=A9&ARf+@=>ZeB9W_$Hu`%7ld zs$bm--Ug>y3-TeHZBv!hdFe{Ho}kIq<#!QB+1R3z!x>gSSG7Kw0xQO`-J=UF^a#bd zW~1YH9)l!#pB{cz=7x`7t@_XjUUyohX&B_anr-cT8`2r~Wg>K3u}!HW;lqmZMBgN? zBZ~1WSoS!o)xP714Q8dv_o*jM;XeXqmEQof^}R_ZwI~MidlgxObTYCfG&t`4|6-^p0TLU{#=i z5QZpVo%=l3-O<}0c4oZ3@{-H7O#hI~km`dO|Di<-`=|MLu6?i7b!hH1oaVDv1qtN=w)0T2{z7eyA`+jiywN|8$c@{yjK^?txS}-ab6R*NxcDjrg z)AeQp>MF<84^^KJ^*CUQOq#J9SYh-B+cM$X^;(Yu7}Q^+N>h0RvB4TbY`%9Y3Hv&Q zP9oi9=>v2Uf6CCEqa`_R4w7Pwit_MfITL*H>`NpUNfGf!kbHl`dE09G!MJGvC!K+_Y!o0P7SYcx5LjU=D z{|Pj{@BfaF*srA)P=;#DU|96~F%w-*V$=tXDCdMmg}s+tkH^0<%5kfls<)*I11F__ z-DnMs5%gS@wt)f(N;Qupw-`qC+UCh8TR%`ar+kW6b);HkluD-lTA+U(842=BX%``5j- z)RDN$5H>b{QmChNfiKXfY2Yg|zG?J#$oOLG$o%TSg&oF&3kTt9Uy-pg_va0#-pD1A zwJP>u%Z$8x#>=>QDS@cq!$2yb|9tbAKR99J5C}Mtx&-2`k7O`27sh8+!kaFA! zPprA&{SxO=n)R3q@OGYp>IAEVWrV*tqxK*!bVr_~l)z1k-E+9> z2h`?nT@E140(@~)lQQAE5e!1NFm8M#US_40i^&(oaAFDH@;>~4TDTXnIXK$pA<+*C zM=k!t*! z(RxbC`Y0N8+78v8@fj_1%S5-(XcpA0AbOtv?1qJ*qGy1;&ZuRN26i!HPB+=hn~x7S zFyWFhp@YudHaSqHeWNSgeE%XW&x0(+IBa`SzyHn8o*4bgPdK8nlL^ci5 zLj}Cxe&M$A=E}R~8j&Letok&~<=b}N(5i^&>L_Wl!YX42fWKA$fTNCDR?`HW+I3_d zwM5HG6`YPSQo4ENV0Dqvj2bYUr~-(gT0@$tI-Nk<&~lwxgTe*nv8cACI7`t zE73;`Q5w}CyouezW-T7Gvy(7&ZqeeBmY1EZjVfJ}}`I znECm$zf3wdMGdd)G!tR>Gq_9x&B^-5gN-{mcqd`M@ZRqBkH3+=8&>1)z|DJue!al@ z)#*V;TCO-zOMpthmJy-9I@93$)Btda-68-GnME;H4R0TMc9b2*ZKkJ9`uwccAS2^+ z(d_$F)g-$GYz}(ZZW!lXWJyG{sHno`i{6ZKpB!EIiW-Y}o!K!qnm_Sgm-oWnAsBk+ zaP1=BRQgLOoo{o63xd+}8oMv!bK1xC+dFw4SoK+AL{Uxoi1cG zyyZu%?d>@lP#T*77tRUZzCsRDgL^f8E-fIDC0I)N!UiRn&sWNC)gCrMc}}&+aa^g0(8~5T z#K2pn?4#7}73nW*I}R}LwzutHv9;FQnN&WCkE3xP3|{-*1Eg@^7w9t109l{u?ZWj$ zWLBGI=mgFqGjbYYODeL{15H)+O0!JNmkRtx3Je6lRypKu#SN#rOW;D^QpBqXM^mhj zfF1A4#g-c;0Lnc}cY&ZFX%)Uy{yhOXU$iib<0dHg$)z5ag;f_+Xi#7p$KI$Cxmu@A z4p}0wozD24!l9tqfEGbLI*3e`4n6QAG1iiPHO0Q0jKCRm*NnwmApzW;d2xuAi)%8P z0HO{URcKUga- zTC&qTqQPl_N`qXYvWH4Pu_#R@ETdnwz1F%xZFxwgIJE z&vPu=x^rTa6O`~-Gr3KpB=YL57MbHwV+5XCUvapOCcI~ceydRX$Z&@lv#NLPfg>JK zR|hw&H~LhjFxk2~K6?KWLl4fr287p#i6f23u8Z#z*mCY$pq2%OdLhzLlvq^YDS}ra zL?BF42<4b+evO*)B-(?9TR)L?S4}uSH>mFlw%f{fM-+hXEk}0V#l9BVjpVp{R5G69 z%zmkEZ|Bm-r62J97nRb!XILIOFiG$!QLcF;wFQJ8{70@jwX)bz@U2U?ZLNsU7XQGa zMwOk_b0yNPaYa0&?}iMJaR*18QsrTcwq9(>+4W2?LQ2liJK=}IL)kpZ_`Q#~_GIgy z{eqRh#2?+YBQE?2v8-7Uw#7MmJNg*W7Dx2|(4`3F;0KLoEWOC z8{l8i0VCjg1%*I9^4MJ>9o-7{)Xf!#A0aYt%zu7o8ZgV2^G}>(j zt{9-9J*$Ki0yK96L`5IrXv+R3*q(tU_E}yUusWpQePk*Omu{|f0SFzUr5&f(@yR-? z2ReY7q4)A##hi|W)*G*49NyQfRbSm=wpw?dJ>#QYcE)E?eec?b%L^PG)%p3fq&bfw z5E!Tw`629z>TU`gCMt1=L&UkJsjSD}WXsLYAfTi8#5N&dz&TMlKIA#td(8N5 zP=OIuS-BSevOR&0^pz83bWnAi&&djC^lkj?c1}RecD&(Q@S~D34jLw`;Q~#b%-bLmm7@M)blWqU-KQ7z0f&tmKz@N&tVgHtG|68{G zzf89ML7ikvx2~^Uh%*zBK%AV@l6hTS#FGUK{&`nH_$N@^pwSJt8xByAH@Inm4Pp_D zQ!(BuFA2MWIwSiqHhV!>!%q9zNS#`oSyloSUY!gfSx8E4 z^WCOaaps~g+YGB_wZ5h zi&F4wvt%a-=UPS1NV{78$|Zk-DGyz{7#r_yc9_Ct?|2D?>=_&sC(vYXuLqls?57Ah zZ*x8jm%CQsN!i6tE90RvuYcRl65K7YWZu4m9?b-n?J*lSVG35f-8dCo>30<1Zd@?p zSR;l49vk1#H}B4LdEXZcPN&E1;VE>4H2It89OMERQi6h|xhK(Kd!H(AfI}SW#tkA=9t9+3Rj%GzA4lzkc*qPx|}sC+?58EIWg z#UFLlPQhLAOJGY#YTMyxlUo^0wYcwH(=X9CIrO7*X~4}eK=}KH)z=EeUCSa^P$6T( zx8loV9$KSt>_N)X;ICx#;byhia0pRDEVaP73pv8Dt*g&O%2+7;pD;Bnb$ga*RB4B0 zo2-43V(_U=HeBSyHswyT>4XOJu-(v;BV+-dCV1523CF0C@qD#Z>cwNcKW85BY5w6$%5-=d zfpC;xqQ&CXQ;U|2s^7%nvS&Hs>eWr1MC0MLvco@`V29FHumA-Cy)rsJZA3X)QK2A^CM3tDpZzmc5ih65x3ZOn z|Hfj+d%I_xSxXYH>!z<*ct|!M&KI{GKNk?n#d!3N7oRt zD?UoA>vdH?a{pmm{;YqUkBb_P^oz{7+__;~(sEk4lPXvs9n2YZVPor*6C(tOs=qRi zzv^y`E4C7%aQPX=^%^uP55I8D_xVjc9u#sB!`Pb~{ib_ONP$8Eid$ zNdfz$VJn*^mq^5nL=JZ1410~uUM?2b{fr^ct2GG3Swfgm|L#BX-DS`0mHYad6cxJO26yB_+eDz}g1)C#&&wLry7+@o=R}0zMhXjvxCI;+qrxj26G@^p*E^H6>kh zPhXAu7}0fX^%g0e5M(*ui^CC?(^!K}G%{ptu$q&m83VmcKsI0*c$r**Y{9N~f&FB+ zu-o;u=^a<%*M7PJ8F(x}ZU$Jt{)>Fi|F?eWKl#7SRzs4D&t}8f{$)vq3OG!4E|#nl z;P6K(H-AeVJ*2`a54;FPY1p{GqtV2!oTQPJ>poNn$q^VTIrBc?En}i_jqKl9m6a3S zJLDPG^KhFp**pf*bCBuRFcQM>bWaX_N*mhyHIg1<5$^*NMSXp{D|@FVqunOKt_j}G%$|q+T7Zbqmel;Bn&zhYL|$bjqAoMrM%uBPdeAg7 zyWtr4NQQxPB+_D;ujP=vlI0NcrY#xq4JMLRf z6KyI^)_hd6jlbRZu5{9+d(p?Fb_3~nDyPP={VPRHG7^1wa#Z(wgv}TiHEca}rWuIW z=elmkc>*XN=HBuwQ$BIYjnFHBj43hlILD2(KjaPiz*GeY<4(mtdz67ZZj+!r>qcs7Q>b_>pXl7H9n5)D@i2oT6O6T?0$hZWo>)1P3I< z`^0FKawCkpDh;F@30*$Z)fq0g8OFma{zO7+SCh3kHzjo(>z@E~!fC~Q$I;~@vnT6= zo%X6Ib>^P`rrLCu*hk?UFaG{xq*r17Qj7e-=;%|C!{)MA2y&a|SYC05LDeu;+ zo6X|f83_Cfu(z8Ty=iV4`SW@uWcB&>+GQ?pt>A)a%D6S2{4BH1f-r%D7m@v$iI>S4 zE=Ce+NzQ^lj$o4%V+rZ+gPS2~5;g@PP~cz<{}-KD9lBBmg#yt!TKhk9h7Ffkcjv)xmU4q@O_+J8Gb*-LlX_ zGt&GCq&{(oTKh8EMuN_4$5=n=Dz11^diLy?@{*u8DKWg>Gg@YG>Rwq+r=UH<`)#Jf zAQ&*$C5vAAraGWHD<4Or@$+RdoI~ZKIc{lb5JGS5vV4(^t~5mYtOq#=-loG90yJ3r z>X_EZ6WU2@D0{@~sC_s5MI~X(;&vSYR4<4lGh45IZ^EY>`xe6Kx1X|5x~K;WrK?OB zCQ4F0Tp7)BKK8YzGn2pS#M=d7?-8jF`E^C~vYz@0zVx2IJ!td_Q~#KY0~Be3|BemU zUK_9p07W|xY&yzMMC5@K{ft{fpcvZ|!!le4fq3T6an0Zz(*rWAz@raU$^Kji`-3TQ zV>)Ykc_vHgsECPxMpLAbT{G7j&0c=b7qBYQG;l!0*(q$c{u4^^AODmMKmTtw{NHT& z|4KIen^OD{R~E}c@yQ1~k!NO`;qpNpFOdD_?~&pVMOxp0;=GQ{L%mN|1z>79nD1l8-v|6^qbME^metJz1`wAy zh&--4v2p*p8gT})Qc&^|H+(a94t3w0jnhydFmAZ4o!!!>X`_-v5bZ26zpImR-Ruhqq>o0=qRxsk|p!Z zKgor69rX)wOfeM7R_~fE`R~Zbg`h|S;2iyyLSWE>7Q0&^F&MC$>-Sb}$9qN{6Zgtr znOtuTF4C$Yka@}8ZWX5oRLeD*new$7wu7-CnF2j6%K;=46@FhmCZCDI11*0MK!DBb zf$292uZa0WL&*S|#Mv~D{nZcKLlkPR*|e-Lr(L?^<}bMfzDqWeY_5`# zUq>v%K%R&f$7(F6d)g%zgKAc5L<&P1L$`VoB0t(#5Qm(AmsR8P^Xb#2s1++mnkxyB z0s0l4!!+Q?LqS0qJF@Bk7k7_`=C$9fY0W!igl~_U*iXEaIJbwON0m9>i;0pgF52n5 zvhMLqA8i(ep96RA8clP5x(~^*%t@8O1!pebE3J1@^)MwJ_#F?Pa~H-*+!z3f5CUga ziYzxcPoLk_f^wf^I6$`J+?9sU*fcNU5M+gTcBtXo;ff5Oq#jF_7#G6jlN7U*{fyzf+5Zb?r2~7iu1LYeY|esOKy{BqY}2as1j$k>h~(clVNp`B`~+cV>;#Ga0rT!> z%t<`UF(!XiVvEMkn;ZutxOOcc)sP>Yq`dtyF2U$=^u|{8SoX#A_Inov&?ttzhD ztg=|&j+1CR%&U1|WSnQ*vSlI*mec94m#zoG+cW3Wd0VNzH9aSXl&fCgh6WZ?+1QHo zi4E7pi_CcoI4f%W`o^r%<}Q;z!rJ6^@*Py)?#o-1gF}2$Jhw*C+pjH>RkEo>>{CwJZ}Q~fyqTKNp|n@9XlS@1`OOh-rkd=JOysm((Q z8d9%{LDYB-EJ9wPnHf7TH4)I+vk?9SC?Q^1Svncu?gHo3oiok4_6I5IIS?)<9X$fb zuRU*afLZ_@9~ptt@Y{oM$YMVO>LW*ANvtOT7d;m?H0x9;;L3^Vb&rn0O$5cnn2OcK zVKzHd$f(}x0+8`tOVgTyt0*Ka4B_U;Q zBCBw*qy%z=ZC%x-s9iuk^9_UVrQDTgeZ-Q`-1^vzJrAsfy5ZNsE93ufHoX1Uz%i1^ zD%8-h=f0<%G6V!0NlL%N<5Mg)Jao^0)T48=^ymiJ?vNfGX(~`yMcl~a+l7cOa(!Ct zI%sDC(~nNX-;Ta?c-N%djbhODC9vTrVKl8{hJOp?7Ra^$U7SSjyxPD_YWD7Uw)ba7 zwsRp%hprQLZq^rG*$cAE?3CP*v$7rhyxkHsMk&Dz*12o-=BQ{R4cnBi0x~h-dnw%3 zaQ9@B-3s?L{XkfT?)&b$GTVQ7|H>*6h>VN4NiN8+of@|AJRT;IE^ba41jKZ{oOm~E zme!_#O|fQs;pPS%+CZE#q3*%Cot$SMhkT16qZD0kyx5UrucNI89NU!l?TEg}Mh8^a z_8S~J-bkN9`SOty$S&m~AVP-&5jp~h(52cpXp{*$o+uFij)z6`(+emFe$akxbx!2+@v#6l?K%yl{S~9s_BHkC*?4?wf9~0-NZ1+fwub1@u6jkp<*=A~ zjfpd$9g?jv&NwT_Z_j+MTk@eo=u{!W`Ijq)t4|IOek8t+3V%?ySOc5vn|G5ZAh%qy zHNxL41pNBR;SC=HH@kwTBQojuHRY6!7fotD>jSagbDhf5OGo+Gd}NMfb=!%70F@xm zJ&|;&e4l}rgm0&I-%~`}L`L(AJa52Z9~Pjtk|Q9Sj&mmr(<|Y9fAC(Tzk09V#cLF5AnM8lsy|S|gf=I?s#$#UZJ8q|i9y};5t z=i!CwiU{AUSqog&1&6eUfy??Hfw1^8CL#rZ=Faw@msmo2q*y9qPKV6F|(?05KPF^ao~ufDu+A8QNR4V#)zP zxBWoEX!4Uk}j1_l!5=6Q0x{?Qe!=PSJ+g_m_Y&5QJC>EUWNG z*7cO+XB|E)bF&rm3>}y*m=D&Y--5O1u@( zQ>vF;%cXNYYSPP(I7DH@5pP=)KjCzr@41hb8+08$~OGHdtem%6jHH+9i z1tv{Z2cA*4-{wG>I9`qqmOP$3J7xD&Nfrwtn{M0nF?&v|v;aRS;3H&aZSQ(&to)ki zc8oV2ar&kuO2jZBcx&}`XoOy>Igo(WW+NgrvPJfQ=4r5z&EDH@Fn0%<_ci;@vQvZ1 z!V;m3mqpyVuKiTza3(0yi(LEYCZF~CxyUw7{7Q#~cIjqK#=y82r|3;Lh(MII8DsAV zdiR)&6ayJ^0or&!m_xzBsy3=CTX(Ng^)C1oYlsHl|u%?cOe%wE1*mvaK(imD*q{CX%ssxN7kFu?$nv} zp)XDY%NAKb8+A18TXa?iAp~DMOC(f1uw&FtPY>pgHqFdsp?M!lPS=}#baez5SxY9A z^WV9d)X8zG^4IYr1=VtVeLI2Qf z9d!zrt@r%VY`y2J*;*`ore`0NU3iGW1Ra6upeO=^hzb6!#8s-jjw-Tfe*?dn<64yf00VG z6r_yK;n3oYjf6WQI2uUsn|B$e^kH_Y#rMo7{tDSIv$D$ueA%A{em?XWPO(o(gVu`ZU9Bs_E$=0(=fL$o1JPi3mo)hWMnha-rJn zx!mX-^XbQX&Wm`wThLbz5MgP&$F~>WTbf+0mlgaRPioa#7ht^huV$x->Vp6^= zL#NKZ{!-S>l{)`s{_R>B2ve@W>ETXB`3sl$SR;wa<L)_)|?)4u&1nAoVg@pf5Q(&!_#!K z2pJhdJ9A2g)SqNMgca|lznrrn)sPZ(RE_#A5gu>5tROooM3Im6gzs3X z-pWfJlo2dq@zixim-=fuyH;v0r2soIp@aYBM}v-=akP+IK(tkozJTUy14UVXFK7 z>Yl-<=`t*PG_XTIjbJF2zG+a5wl}DLtiBAl9l)k>RiVk}NcER*k(N{fnlKtk@S5-9 zW+5fU-$dYVjofcL{DF)9lNd9nOU~%7#fuUl1iI5jtMsO>||^hT%8*S)$?_ z>P`K^aq6sOfnN^CwUdUwL{MkSioWvDA5>}j?Fa4a2XtPF&r1bddjqW2mEB+w?VLKh zDfjDkP38p=VUr!uk(PX?&iT>ka?K;WK_lXmf%t^AW`8!S5?IXxx{vJrCj8EBHFl~& zeTWZg2la;tSc4J2#bHW@C(h>137bHb1`!2jUrcgmpqZAZ>!w*qFSytS*%?@J(fv>< zg(X$D-B4vFVnVN|`SmC9bJ-FCse7M4xMF?%?3sOg{-Nrdqx4f+*ajv&Fp3~| zH9HMZn~+Nagq0}-Ryq^Mzt=SQB&H3uYq-G;LB+q$H~jB+6#f_N;lbdyuVV%_p-Wr!q#muhM}omkkTFFX5`ve5>p8ge*#X2$=RdZNny9Km&u*UwzkV(*` zxCo~uBFi_l%aL>Pa-`q=*zAu8#V)jom$^%gYZ~ajU-PcKPisSDxEL4)L)A- z&d7bikim?2n2Sttxk)Eg60b2{^igy-t#3mR;9UP|0Oc%u!iJwnun4}|_e#1-(j2DF z;u3%tYOM`KM197%9iMN8Q#H{HZa>Hq0G<%GSYjzr{g~)E?llu^`5flj%WEV_0Bm-z z&I&M&xheIk=T|W}ptjxiGv;6)u4J`l_>>$^$h4q_EXLsQ%aDL~3VxZ!bUn?`;X{;o zSd`mms&j)*b#Ev2OPgAJ9&0_B!*PTht{ z*cSPMA2JM++M;GYZVTWLmzhU9H1TDYD> zJai*kU8m~mP0?G7TOCSZju>v_6OVlE`4-(UKz{c>B&vsgoJQB!ifB!Ojtbio-?brY zMoNIzz#7G^RmS)%@#RV0=_^+47IQ&TB9!>$#OgfzB4~bUyy{CF*>OOmQhU04ROx4` zb)(<>q5H+0e{&4Z@DM)$bAVkrsZ3CaA_ONczJiL`W5*p(0Yp7*Ynh*v_pf&Ap7dRO zdLkZ=w9S=0eN%&ePb|EELM7OYmC@hYOs^xNvQ30@EklSVyJ-#T%<;=YIqJXhS7I+f8!|jLnvx;n7R=%{mq$-0?t5`LVk!G5CZMRwX7nCLe(m z?vOVVLIVp_W8s>o277-_UCN1`TY4S4V?EgQ%3@EP!WCES30CJfbgeK06^`s6sru{f zZ)@ei#!_b=4v1TVzV>l3#j%b8g{)u)!8iRy&tNqtLF5j@+v4XYTOmE@c(5W}$v$p! zvXwFbfy@WPb(PV)Zj$k@WCxcm3}20!co*ekcBZs2FEWd;?Y+uK$fCy<`>F7N{=}=| z?MJ>??m{PkMii0sUx;M}hf+F;h|-wrPI+-+43L%1xY8v3-K@G0XYNH5h!zlhUf1}Z zc|%(41>DE2lv>t0p_Dyj z(2Aq2`a@?K(Si=2wdnI)n1;vLfY5Z%oM-)brxB@Tuv6Cn{CY=Bl%Vm}W+W$I0(!2* z)<6p@^~+kNpc2to_b+Qv4|en~XmtOlTX~=Z7@o<#(6BIFWQU{`B~b>cJM0b`Hyn>| zKtql1i>rsgje{fw8mx8-J7&egXQ5`wW_@DejaEL&ZXq&0EpW-`%n}-w9^#`GK>f&f zmaeohFhl0?(V;-Oo3}+W5b$cK0?igEEJ8W<1CP$cYIb03S~L3j{rc^5r=La2+rWL` z&*L)iT|nqT+7S#6c&;TXMC9N?|I~W(kt`D{R9u+Ql3T_`*x?EMqgP$#Q43nAWca(H zI98-d4^eMx<>V_5RN*@NV~4%^9`8)sQT?CBj{#z*sYj?|R}ZQrbS;yU28%1%J)8EP zq*9&bkg8oHxqqxhoqo60SM(RgEg;#gl}584;-Vo&8?x}2Bb7V(j?}`RlsyhnGdJ%T z7`Uxr=o~HOrVDPjIpayG@^`niugq>W-jrF%=PIUc}w?4PFjw<^KrqAu;V2<7BUBtw+2c!&XgYRcUn~d z8T1UvUeN&IgG--?Wky01P7xs-uf#}BiL1k7XE2)^F0je-`EYTanxQF6#s`h$R`7Of zR80l3@Afj>7S7;(q7puN&s;{6PK{ubAh!rPx*4LfMvU^|m$Q8?qhRYw{e^}7Yr{9? zdLt>MGuf3rn2j?nZ*&&!9ZPi1GR{(|QIxJ)9%6bzO}~4lPwuSf6oOslM73yQ#0f@G zL-|zYBu|a%EaOz;X+g31?z(MR;j<2^44>)t8JN}9wUo+*oh1fyb*B-R=N0;aJ4mmWJdJ^ypDSE>$vGVsq$X*6i|$VxPV_-C65H zykG|lCxBm3@s_yn+R_$D>Gt=op5-+o5G+7&Vs9oUZo5Gu-5{ zCpbu_cfYSWQq-k^SDzgy7qRIBO^5?tU|<;{d?)#O)sxiUWhC1odl3$GadY7QjSau{ z#3WlUZe6`rOU|-Qcu5AYjgcKhVUb*D*7p1y)&!0ta94Op=nF7wf82}&roOSa{ybpD z1L359EtTBdzQKF6jJx>@HESZurp;M4-v8W+*FWT;@&y*Nj7PTxf$wO_p z9ZY@APB})Q&&d~2d^xh$p#|mThERSX4jOEWZ+=<}(~R#2Rx!2nJZkhkq&o~?EuU@2 zPjX|3E?-*u7xb#EKZsW9vaEinJ9+hsoKSpAQ{w=Ph4j$fUJF-(p(sJ^cZ)Aw}e&1tjMprwC|(0IijdDX&zjYJHeq5byU&gw(fCb2za=?h^x+xxhRIr0K;!4 z1F^Z;aw;w90r|pKSY2y~<|^PR+N( zHLaSNgc~;ULKHW@>H!6UN3S~Acpla~Si)lKbDr0<5LvM8kleNPTF$zhE7B_plP>x~ z9oL;6sx_l492wxt&x9|bjP8Y6Cx_eWfO4UQ;emVZ?0GgdZAPVD2e-RBf$2xZD3z0T zfBa^wljOMTAZrgT=I+P9Ki{FTy1}uXfU5O>-DjX+k@wvuPcCt?KrT*U4t$yI(s{!l KUTNRB_kRFRpT64w literal 0 HcmV?d00001 diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 685e5a4..0badaaa 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -1,8 +1,8 @@ /** * @file main.cpp * @brief Stream compaction test program - * @authors Kai Ninomiya - * @date 2015 + * @authors Chhavi Sharma + * @date 2019 * @copyright University of Pennsylvania */ @@ -18,152 +18,219 @@ #include unsigned long seed = time(0); +#define XOR 1 +#define CR 1 - //====CONFIG Neural Network for XOR ================ - //================================================== - -/* - // XOR - const int N = 4; // Number of examples - const int D = 2; //Feature length per example - const int H = 4; // Number of Hidden unit - const int C = 2; // NN number of classes - const double LR = 0.5; - const int epochs = 1000; - */ - - - // Char Recognition -const int N = 52; // Number of examples -const int D = 10201; // Feature length per example -const int H = 10; // Number of Hidden unit -const int C = 52; // NN number of classes -const double LR = 0.5; -const int epochs = 5000; - - - -double *losses = new double[epochs]; -double *idata = new double[N*D]; -int * preds = new int[N]; -int * gtruth = new int[N]; - +int N ; +int D ; +int H ; +int C ; +double LR; +int epochs; int main(int argc, char* argv[]) { - // Scan tests - printf("\n"); printf("****************\n"); printf("***MLP TESTS***\n"); printf("****************\n"); - /* - printf("Launch XOR Training\n"); - - // XOR input data set 2*4 - idata[0] = 0.0; - idata[1] = 0.0; - idata[2] = 0.0; - idata[3] = 1.0; - idata[4] = 1.0; - idata[5] = 0.0; - idata[6] = 1.0; - idata[7] = 1.0; - - // XOR ground truth data set 4 - gtruth[0] = 0; - gtruth[1] = 1; - gtruth[2] = 1; - gtruth[3] = 0; - - CharacterRecognition::trainMLP(N, D, H, C, idata, preds, gtruth, epochs, losses, LR, seed); - printf("\nCompleted XOR Training\n"); - // STORE LOSSES - std::ofstream myfile("xor_losses.txt"); - if (myfile.is_open()) - { - for (int i = 0; i < epochs; i++) { - myfile << std::fixed << std::setprecision(8) << losses[i]<<'\n'; - } - myfile.close(); - } - */ //========================================================================================== + //================================= XOR GATE NEURAL NET ==================================== //========================================================================================== - - printf("Launch CharRec Training\n"); - // Data loading - printf("Loading data...\n"); - int data_sz = 0; - int x = 0; - - std::string line; - int *id = new int[N*D]; - for (int i = 1; i <= 52; i++) { - std::string fname; - if (i < 10) { - fname = "C:\\Users\\chhavis\\cis565\\Project2-Number-Algorithms\\Project2-Character-Recognition\\data-set\\0" + std::to_string(i) + "info.txt"; - } - else { - fname = "C:\\Users\\chhavis\\cis565\\Project2-Number-Algorithms\\Project2-Character-Recognition\\data-set\\" + std::to_string(i) + "info.txt"; - } - std::ifstream myfile(fname); - std::stringstream sstream; - std::stringstream sstream2; - std::stringstream sstream3; - - //std::cout<