-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Michael Beyeler
committed
Oct 14, 2014
1 parent
42ffcf7
commit afc4b40
Showing
2 changed files
with
151 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,75 +1,54 @@ | ||
/* | ||
* Copyright (c) 2013 Regents of the University of California. All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions | ||
* are met: | ||
* | ||
* 1. Redistributions of source code must retain the above copyright | ||
* notice, this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* | ||
* 3. The names of its contributors may not be used to endorse or promote | ||
* products derived from this software without specific prior written | ||
* permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
// | ||
// Copyright (c) 2011 Regents of the University of California. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions | ||
// are met: | ||
// | ||
// 1. Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// | ||
// 2. Redistributions in binary form must reproduce the above copyright | ||
// notice, this list of conditions and the following disclaimer in the | ||
// documentation and/or other materials provided with the distribution. | ||
// | ||
// 3. The names of its contributors may not be used to endorse or promote | ||
// products derived from this software without specific prior written | ||
// permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
|
||
|
||
/* | ||
* Large-scale simulation of cortical visual processing. | ||
* This code implements both (i) the color opponency model (De Valoisetal.,1958; LivingstoneandHubel,1984) as well as | ||
* (ii) the motion energy model (Simoncelli & Heeger, 1998). The original version of this code has been released as | ||
* part of the publication | ||
* Richert, M., Nageswaran, J.M., Dutt, N., and Krichmar, J.L. (2011). "An efficient simulation environment for modeling | ||
* large-scale cortical processing". Frontiers in Neuroinformatics 5, 1-15. | ||
* | ||
* For usage example, see CARLsim 2.0: | ||
* http://www.socsci.uci.edu/~jkrichma/CARLsim/index.html | ||
* (ii) the motion energy model (Simoncelli & Heeger, 1998). | ||
* | ||
* Creator: Micah Richert | ||
* Curator: Michael Beyeler | ||
* Created by: Micah Richert | ||
* Maintained by: Michael Beyeler <[email protected]> | ||
* | ||
* Ver 02/15/2013 | ||
* Ver 07/19/2013 | ||
*/ | ||
|
||
#include <cutil_inline.h> | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <cufft.h> | ||
|
||
#include <cutil_inline.h> | ||
|
||
#define IMUL(a, b) __mul24(a, b) | ||
|
||
// In order to run the color opponency / motion energy model within your simulation, you need to declare the function | ||
// in your "main_[].cpp" à la: | ||
// void calcColorME(int nrX, int nrY, unsigned char* stim, float* red_green, float* green_red, float* blue_yellow, float* yellow_blue, float* ME, bool GPUpointers); | ||
// input arguments: | ||
// nrX: input dimension, x-coordinate | ||
// nrY: input dimension, y-coordinate | ||
// stim: RGB input stimulus, must be a nrX*nrY*3 array of unsigned chars, where the R, G, and B values of each | ||
// pixel is ordered as R1 G1 B1 R2 G2 B2 ... | ||
// output arguments (pre-allocated): | ||
// red_green: red-green channel, pre-allocated array of floats with dimensionality nrX*nrY | ||
// green_red: green-red channel: pre-allocated array of floats with dimensionality nrX*nrY | ||
// blue_yellow: blue-yellow channel: pre-allocated array of floats with dimensionality nrX*nrY | ||
// yellow_blue: yellow-blue channel: pre-allocated array of floats with dimensionality nrX*nrY | ||
// ME: motion energy, V1 complex cells: pre-allocated array of floats with dimensionality nrX*nrY*28*3 | ||
|
||
#define IMUL(a, b) __mul24(a, b) | ||
|
||
// 28 space-time orientations of V1 simple cells | ||
#define nrDirs 28 | ||
|
@@ -83,9 +62,8 @@ __constant__ float d_v1Dirs[3][nrDirs] = {{-0.6559, -0.1019, 0.6240, -0.7797, 0 | |
|
||
// this filter is used for the 3 different scales in space/time: | ||
// first scale == original image resolution | ||
// second scale == first scale blurred with Gaussian (width 1), so resolution should scale down sqrt(2) | ||
// third scale == second scale blurred with Gaussian (width 1) | ||
// FIXME: not sure why this guy is not normalized | ||
// second scale == first scale blurred with Gaussian (width 1) | ||
// third scale == second scale blurred with Gaussian (width 1) once again | ||
#define scalingFiltSize 5 | ||
__constant__ float d_scalingFilt[scalingFiltSize] = {0.0884, 0.3536, 0.5303, 0.3536, 0.0884}; | ||
float* scalingFilt; | ||
|
@@ -134,7 +112,7 @@ float* diff3filt; | |
|
||
// number of time steps to be considered in computation | ||
// nrT = 5*(3-1)-1 = 9 | ||
#define nrT (scalingFiltSize*(nrScales-1)-1)//(scalingFiltSize*nrScales-2) | ||
#define nrT 9 // (scalingFiltSize*(nrScales-1)-1) | ||
|
||
int stimBufX, stimBufY; | ||
float* d_resp; // will be the returned ME responses | ||
|
@@ -222,7 +200,6 @@ __global__ void dev_convn(float* idata, float* odata, int nrX, int nrN, int stri | |
// conv2D is only used in the color model | ||
// odata must be pre-allocated | ||
// the result will end up in idata... | ||
// FIXME: in conv1D the logic was: perform operation on idata and output to odata | ||
// filtlen can not be greater than CONVN_THREAD_SIZE2 | ||
void conv2D(float* idata, float* odata, dim3 _sizes, const float* filt, int filtlen) { | ||
unsigned int* sizes = (unsigned int*)&_sizes; | ||
|
@@ -243,8 +220,6 @@ void conv2D(float* idata, float* odata, dim3 _sizes, const float* filt, int filt | |
dim3 threads2(CONVN_THREAD_SIZE1, CONVN_THREAD_SIZE2, 1); | ||
dev_convn<<<grid2, threads2>>>(idata, odata, sizes[0], sizes[1], sizes[0], sizes[0]*sizes[1], sizes[2], filt, filtlen); | ||
cutilCheckMsg("dev_convn() execution failed\n"); | ||
|
||
// FIXME: shouldn't there be a tmp=idata; idata=odata; odata=tmp; here??? | ||
} | ||
|
||
// conv3D is only used in the motion model in freq space (\omega_x,\omega_y,\omega_t) | ||
|
@@ -378,19 +353,49 @@ void accumDiffStims(float *d_resp, float* diffV1GausBuf, uint3 _sizes, int order | |
// the scaling factor for this directial derivative; similar to the binomial coefficients | ||
int c = 6/factorials[orderX]/factorials[orderY]/factorials[orderT]; | ||
|
||
dev_accumDiffStims<<<iDivUp(_sizes.x*_sizes.y, 128), 128>>>(d_resp, diffV1GausBuf, _sizes.x*_sizes.y, c, orderX, orderY, orderT); | ||
cutilCheckMsg("dev_accumDiffStims() execution failed\n"); | ||
dev_accumDiffStims<<<iDivUp(_sizes.x*_sizes.y, 128), 128>>>(d_resp, diffV1GausBuf, _sizes.x*_sizes.y, c, orderX, orderY, orderT); | ||
cutilCheckMsg("dev_accumDiffStims() execution failed\n"); | ||
} | ||
|
||
|
||
// consider edge effects | ||
__global__ void dev_edges(float *data, int len, int nrX, int nrY) { | ||
const int tid = IMUL(blockDim.x, blockIdx.x) + threadIdx.x; | ||
const int threadN = IMUL(blockDim.x, gridDim.x); | ||
|
||
float edgeFactor; | ||
|
||
for(int i = tid; i < len; i += threadN) { | ||
int X = i%nrX; | ||
int Y = (i/nrX)%nrY; | ||
int scale = i/(nrX*nrY*28); | ||
|
||
// we decrease filter responses near edges (use a different scaling factor per spatiotemporal scale level) | ||
// scaling factors are chosen such that no more edge effects are visible in the direction tuning task, whatever | ||
// works... | ||
float edgedist = (float)min(min(X,nrX-1-X),min(Y,nrY-1-Y)); // box-shaped instead of round | ||
if (scale==0) | ||
edgeFactor = fmin(125.0f,edgedist*edgedist*edgedist)/125.0f; // 5px^3 | ||
else if (scale==1) | ||
edgeFactor = fmin(1296.0f,edgedist*edgedist*edgedist*edgedist)/1296.0f; // 6px^4 | ||
else | ||
edgeFactor = fmin(7776.0f,edgedist*edgedist*edgedist*edgedist*edgedist)/7776.0f; // 6px^5 | ||
|
||
float d = data[i]*edgeFactor; | ||
data[i] = d; | ||
} | ||
} | ||
|
||
|
||
// parallel half-wave rectification and squaring | ||
__global__ void dev_halfRect2(float *data, int len) { | ||
const int tid = IMUL(blockDim.x, blockIdx.x) + threadIdx.x; | ||
const int threadN = IMUL(blockDim.x, gridDim.x); | ||
|
||
for(int i = tid; i < len; i += threadN) { | ||
float d = data[i]; | ||
d = (d>0)?d:0; | ||
data[i] = d*d; | ||
float d = data[i]*6.6084f; // scaleFactors.v1Linear | ||
// d = (d>0)?d:0; // Matlab code and Rust et al, 2006 do not half-rectify anymore | ||
data[i] = d*d*1.9263; // scaleFactors.v1FullWaveRectified | ||
} | ||
} | ||
|
||
|
@@ -410,8 +415,8 @@ __global__ void dev_mean3(float *idata, float *odata, int nrXnrY, int nrZ) { | |
} | ||
} | ||
|
||
|
||
// population normalization of complex cell responses | ||
// note the 0.1, probably to avoid division by zero | ||
__global__ void dev_normalize(float *resp, float *pop, int nrXnrY, int nrZ) { | ||
const int tid = IMUL(blockDim.x, blockIdx.x) + threadIdx.x; | ||
const int threadN = IMUL(blockDim.x, gridDim.x); | ||
|
@@ -420,7 +425,7 @@ __global__ void dev_normalize(float *resp, float *pop, int nrXnrY, int nrZ) { | |
for(int i = tid; i < nrXnrY; i += threadN) { | ||
float norm = pop[i+blockIdx.y*nrXnrY]; | ||
int ind = i + blockIdx.y*blockSize; | ||
for (int j=0; j < nrZ; j++) resp[ind+j*nrXnrY] /= (norm + 0.1); | ||
for (int j=0; j < nrZ; j++) resp[ind+j*nrXnrY] /= (norm + 0.1*0.1); // scaleFactors.v1sigma | ||
} | ||
} | ||
|
||
|
@@ -559,7 +564,14 @@ void calcColorME(int nrX, int nrY, unsigned char* stim, float* red_green, float* | |
float* center_filt = v1Gaus; | ||
float* surround_filt = complexV1Filt; | ||
const int center_filtSize = v1GausSize; | ||
const int surround_filtSize = complexV1FiltSize; | ||
const int surround_filtSize = complexV1FiltSize; | ||
|
||
/* size_t avail, total, previous; | ||
float toGB = 1024.0*1024.0*1024.0; | ||
cudaMemGetInfo(&avail,&total); | ||
printf("after ME:\t\t\t%2.3f GB\t%2.3f GB\t%2.3f GB\n",(float)(avail)/toGB,(float)((total-avail)/toGB),(float)(avail/toGB)); | ||
previous=avail; */ | ||
|
||
|
||
cutilSafeCall(cudaMemcpy(d_stim,stim,3*nrX*nrY,cudaMemcpyHostToDevice)); | ||
dev_split<<<iDivUp(nrX*nrY,128), 128>>>(d_stim, d_red, d_green, d_blue, &d_stimBuf[nrX*nrY*(nrT-1)], nrX*nrY); | ||
|
@@ -687,51 +699,97 @@ void calcColorME(int nrX, int nrY, unsigned char* stim, float* red_green, float* | |
} | ||
} | ||
} | ||
|
||
// perform half-rectification on V1 simple cell responses | ||
// eq.4 in S&H: halfrec[L(t)]^2 = max[0,L(t)]^2 | ||
|
||
// consider edge effects | ||
dev_edges<<<iDivUp(nrX*nrY*nrDirs*nrScales,128), 128>>>(d_resp, nrX*nrY*nrDirs*nrScales, nrX, nrY); | ||
cutilCheckMsg("dev_edges() execution failed\n"); | ||
|
||
|
||
// half-square the linear responses | ||
// contains scaleFactor.v1Linear and scaleFactor.v1FullWaveRectified | ||
dev_halfRect2<<<iDivUp(nrX*nrY*nrDirs*nrScales,128), 128>>>(d_resp, nrX*nrY*nrDirs*nrScales); | ||
cutilCheckMsg("dev_halfRect2() execution failed\n"); | ||
|
||
float* tmp; | ||
|
||
// The following two steps have reversed order... S&H first compute the normalized simple cell response, S_n(t), | ||
// and then compute the local average. We do it the other way around. We might be doing that because we need a | ||
// normalized responses at the complex cell level (this is our output), but S&H does only normalize at the simple | ||
// cell and at the MT level. Also, the order of normalizing / local averaging should not matter (interchangeable) | ||
|
||
// complex: convolve by d_complexV1Filt in 2D | ||
cutilSafeCall(cudaMalloc((void**)&tmp, sizeof(float)*nrX*nrY*nrDirs*nrScales)); | ||
uint3 sizes = make_uint3(nrX,nrY,nrDirs*nrScales); | ||
conv2D(d_resp, tmp, sizes, complexV1Filt, complexV1FiltSize); | ||
cutilSafeCall(cudaFree(tmp)); | ||
|
||
// scale with scaleFactors.v1Blur | ||
// NOTE: scaling with 1.0205..? Skip to save computation time | ||
// dev_scale<<<iDivUp(nrX*nrY*nrDirs*nrScales,128), 128>>>(d_resp, 1.0205, nrX*nrY*nrDirs*nrScales); | ||
// cutilCheckMsg("dev_scale() execution failed\n"); | ||
|
||
|
||
// we need to associate each filter at pixel position (x,y) with a power/intensity, but there are 28 filter | ||
// responses at each location... so we need to (i) average over the 28 filters (3rd dimension in d_resp) and put it | ||
// into d_pop ... | ||
dim3 gridm(iDivUp(nrX*nrY,128), nrScales); | ||
dev_mean3<<<gridm, 128>>>(d_resp, d_pop, nrX*nrY, nrDirs); | ||
cutilCheckMsg("dev_mean3() execution failed\n"); | ||
|
||
// ... and (ii) sum over some spatial neighborhood | ||
// ... (ii) scale with scaleFactors.v1Complex | ||
// NOTE: Scale with 0.99..? Skip to save computation time | ||
// dev_scale<<<iDivUp(nrX*nrY*nrDirs*nrScales,128), 128>>>(d_resp, 0.99, nrX*nrY*nrDirs*nrScales); | ||
// cutilCheckMsg("dev_scale() execution failed\n"); | ||
|
||
|
||
// ... and (iii) sum over some spatial neighborhood | ||
// population normalization: convolve by d_normV1filtSize in 2D | ||
uint3 nsizes = make_uint3(nrX,nrY,nrScales); | ||
cutilSafeCall(cudaMalloc((void**)&tmp, sizeof(float)*nrX*nrY*nrScales)); | ||
conv2D(d_pop, tmp, nsizes, normV1filt, normV1filtSize); | ||
cutilSafeCall(cudaFree(tmp)); | ||
|
||
// don't scale with scaleFactors.v1NormalizationStrength * scaleFactors.v1NormalizationPopulationK | ||
// since we don't normalize over the WHOLE population, these factors are off | ||
// the purpose of this normalization is to get a htan()-like response normalization: a scaling factor of 1.0 turns | ||
// out to be good enough | ||
dev_scale<<<iDivUp(nrX*nrY*nrScales,128), 128>>>(d_pop, 1.0, nrX*nrY*nrScales); | ||
cutilCheckMsg("dev_scale() execution failed\n"); | ||
|
||
// d_resp is the numerator, d_pop the denominator sum term | ||
dev_normalize<<<gridm, 128>>>(d_resp, d_pop, nrX*nrY, nrDirs); | ||
cutilCheckMsg("dev_normalize() execution failed\n"); | ||
|
||
// Scaling factors were chosen such that in a RDK task all 3 scales have similar mean responses | ||
// We believe this to be a reasonable assumption considering that dots do not suffer from the aperture problem; | ||
// thus the model should have similar response strengths at all 3 scales | ||
for (int scale=0; scale<nrScales; scale++) | ||
dev_scale<<<iDivUp(nrX*nrY*nrDirs,128), 128>>>(&d_resp[scale*nrX*nrY*nrDirs], (scale==0?1000000:(scale==1?500000:100000))/255.0/255*50, nrX*nrY*nrDirs); | ||
dev_scale<<<iDivUp(nrX*nrY*nrDirs,128), 128>>>(&d_resp[scale*nrX*nrY*nrDirs], (scale==0?15.0f:(scale==1?17.0f:11.0f)), nrX*nrY*nrDirs); // 15 17.5 17 | ||
|
||
// copy response to the Host side | ||
// copy response to device or host (depending on whether we run in CPU_MODE or GPU_MODE) | ||
cutilSafeCall(cudaMemcpy(ME,d_resp,sizeof(float)*nrX*nrY*nrDirs*nrScales,GPUpointers?cudaMemcpyDeviceToDevice:cudaMemcpyDeviceToHost)); | ||
|
||
/* | ||
unsigned int free, total; | ||
CUresult res = cuMemGetInfo(&free, &total); | ||
if(res != CUDA_SUCCESS) printf("!!!! cuMemGetInfo failed! (status = %x)", res); | ||
printf("used GPU memory %ld\n", total - free); | ||
size_t avail, total, used; | ||
cudaMemGetInfo( &avail, &total ); | ||
used = total - avail; | ||
printf("used GPU memory %f MB\n",(float)(used/1024.0/1024.0)); | ||
*/ | ||
} | ||
|
||
// free all allocated blocks | ||
void freeAllCUDA() { | ||
cutilSafeCall(cudaFree(d_stimBuf)); | ||
cutilSafeCall(cudaFree(diffV1GausBufT)); | ||
cutilSafeCall(cudaFree(d_stim)); | ||
cutilSafeCall(cudaFree(d_scalingStimBuf)); | ||
cutilSafeCall(cudaFree(d_v1GausBuf)); | ||
cutilSafeCall(cudaFree(d_diffV1GausBuf)); | ||
cutilSafeCall(cudaFree(d_pop)); | ||
cutilSafeCall(cudaFree(d_red)); | ||
cutilSafeCall(cudaFree(d_green)); | ||
cutilSafeCall(cudaFree(d_blue)); | ||
cutilSafeCall(cudaFree(d_center)); | ||
cutilSafeCall(cudaFree(d_surround)); | ||
cutilSafeCall(cudaFree(d_color_tmp)); | ||
cutilSafeCall(cudaFree(d_color_tmp_green)); | ||
cutilSafeCall(cudaFree(d_color_tmp_yellow)); | ||
|
||
stimBufX = 0; | ||
stimBufY = 0; | ||
} |