src-d
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.travis.linux‎
Lines changed: 2 additions & 1 deletion b/‎.travis.linux‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.travis.osx‎
Lines changed: 2 additions & 1 deletion b/‎.travis.osx‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 147 additions & 23 deletions b/‎README.md‎
Lines changed: 147 additions & 23 deletions
diff --git a/‎src/CMakeLists.txt‎
Lines changed: 13 additions & 3 deletions b/‎src/CMakeLists.txt‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎src/cmake/FindR.cmake‎
Lines changed: 55 additions & 0 deletions b/‎src/cmake/FindR.cmake‎
Lines changed: 55 additions & 0 deletions
@@ -1,4 +1,6 @@
 .idea
+cmake-build-*
+**/*.cbp
 **/CMakeCache.txt
 **/CMakeFiles
 **/.DS_Store
 
@@ -1,8 +1,9 @@
 #!/bin/sh
 
+rm -rf /opt/python
 sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
 wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_8.0.44-1_amd64.deb
 sudo dpkg -i cuda-repo-ubuntu1404_8.0.44-1_amd64.deb
 sudo apt-get update
-sudo apt-get install -y --no-install-suggests --no-install-recommends g++-5 python3-dev python3-numpy cuda-cudart-dev-8-0 cuda-curand-dev-8-0 cuda-core-8-0 cuda-misc-headers-8-0
+sudo apt-get install -y --no-install-suggests --no-install-recommends g++-5 python3-dev python3-numpy r-base-core cuda-cudart-dev-8-0 cuda-curand-dev-8-0 cuda-core-8-0 cuda-misc-headers-8-0
 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 1 --slave /usr/bin/g++ g++ /usr/bin/g++-5
@@ -1,7 +1,8 @@
 #!/bin/sh
 
 brew install llvm --with-clang
-brew install python3
+brew tap homebrew/science
+brew install python3 r
 pip3 install numpy
 brew cask update
 brew cask install --verbose cuda
@@ -18,8 +18,8 @@ ball tree.
 
 Technically, this project is a library which exports the two functions
 defined in `kmcuda.h`: `kmeans_cuda` and `knn_cuda`.
-It has the built-in Python3 native extension support, so you can
-`from libKMCUDA import kmeans_cuda`.
+It has the built-in Python3 and R native extension support, so you can
+`from libKMCUDA import kmeans_cuda` or `dyn.load("libKMCUDA.so")`.
 
 [![source{d}](img/sourced.png)](http://sourced.tech)
 <p align="right"><a href="img/kmeans_image.ipynb">How this was created?</a></p>
@@ -33,16 +33,23 @@ Table of contents
       * [macOS](#macos)
 * [Testing](#testing)
 * [Benchmarks](#benchmarks)
-   * [100000x256@1024](#100000x2561024)
+   * [100,000x256@1024](#100000x2561024)
       * [Configuration](#configuration)
       * [Contestants](#contestants)
       * [Data](#data)
       * [Notes](#notes-1)
+   * [8,000,000x256@1024](#8000000x2561024)
+      * [Data](#data-1)
+      * [Notes](#notes-2)
 * [Python examples](#python-examples)
       * [K-means, L2 (Euclidean) distance](#k-means-l2-euclidean-distance)
-      * [K-means, angular (cosine) distance   average](#k-means-angular-cosine-distance--average)
+      * [K-means, angular (cosine) distance + average](#k-means-angular-cosine-distance--average)
       * [K-nn](#k-nn-1)
 * [Python API](#python-api)
+* [R examples](#r-examples)
+      * [K-means](#k-means-1)
+      * [K-nn](#k-nn-2)
+* [R API](#r-api)
 * [C examples](#c-examples)
 * [C API](#c-api)
 * [License](#license)
@@ -123,6 +130,7 @@ It requires cudart 8.0 / Pascal and OpenMP 4.0 capable compiler. The build has
 been tested primarily on Linux but it works on macOS too with some blows and whistles
 (see "macOS" subsection).
 If you do not want to build the Python native module, add `-D DISABLE_PYTHON=y`.
+If you do not want to build the R native module, add `-D DISABLE_R=y`.
 If CUDA is not automatically found, add `-D CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-8.0`
 (change the path to the actual one). By default, CUDA kernels are compiled for
 the architecture 60 (Pascal). It is possible to override it via `-D CUDA_ARCH=52`,
@@ -167,8 +175,6 @@ Benchmarks
 ----------
 
 ### 100000x256@1024
-Comparison of some KMeans implementations:
-
 |            | sklearn KMeans | KMeansRex | KMeansRex OpenMP | Serban | kmcuda | kmcuda 2 GPU |
 |------------|----------------|-----------|------------------|--------|--------|--------------|
 | time, s    | 164            | 36        | 20               | 10.6   | 9.2    | 5.5          |
@@ -193,6 +199,21 @@ Comparison of some KMeans implementations:
 #### Notes
 100000 is the maximum size Serban KMeans can handle.
 
+### 8000000x256@1024
+|            | sklearn KMeans | KMeansRex | KMeansRex OpenMP | Serban | kmcuda 2 GPU | kmcuda Yinyang 2 GPU |
+|------------|----------------|-----------|------------------|--------|--------------|----------------------|
+| time       | please no      | -         | 6h 34m           | fail   | 44m          | 36m                  |
+| memory, GB | -              | -         | 205              | fail   | 8.7          | 10.4                 |
+
+kmeans++ initialization, 93 iterations (1% reassignments equivalent).
+
+#### Data
+8,000,000 secret production samples.
+
+#### Notes
+KmeansRex did eat 205 GB of RAM on peak; it uses dynamic memory so it constantly
+bounced from 100 GB to 200 GB.
+
 Python examples
 ---------------
 
@@ -276,7 +297,7 @@ calculated 0.276552 of all the distances
 Python API
 ----------
 ```python
-def kmeans_cuda(samples, clusters, tolerance=0.0, init="k-means++",
+def kmeans_cuda(samples, clusters, tolerance=0.01, init="k-means++",
                 yinyang_t=0.1, metric="L2", average_distance=False,
                 seed=time(), device=0, verbosity=0)
 ```
@@ -289,18 +310,20 @@ def kmeans_cuda(samples, clusters, tolerance=0.0, init="k-means++",
 
 **clusters** integer, the number of clusters.
 
-**tolerance** float, if the relative number of reassignments drops below this value, stop.
+**tolerance** float, if the relative number of reassignments drops below this value,
+              algorithm stops.
 
 **init** string or numpy array, sets the method for centroids initialization,
-         may be "k=means++"/"kmeans++", "random" or numpy array of shape
+         may be "k-means++", "afk-mc2", "random" or numpy array of shape
          \[**clusters**, number of features\]. dtype must be float32.
 
 **yinyang_t** float, the relative number of cluster groups, usually 0.1.
+              0 disables Yinyang refinement.
 
 **metric** str, the name of the distance metric to use. The default is Euclidean (L2),
-           can be changed to "cos" to behave as Spherical K-means with the
-           angular distance. Please note that samples *must* be normalized in that
-           case.
+           it can be changed to "cos" to change the algorithm to Spherical K-means
+           with the angular distance. Please note that samples *must* be normalized
+           in the latter case.
 
 **average_distance** boolean, the value indicating whether to calculate
                      the average distance between cluster elements and
@@ -309,17 +332,18 @@ def kmeans_cuda(samples, clusters, tolerance=0.0, init="k-means++",
 
 **seed** integer, random generator seed for reproducible results.
 
-**device** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
-           3 means using first and second device. Special value 0 enables all available devices.
-           The default is 0.
+**device** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device,
+           2 means second device, 3 means using first and second device. Special
+           value 0 enables all available devices. The default is 0.
 
 **verbosity** integer, 0 means complete silence, 1 means mere progress logging,
               2 means lots of output.
 
-**return** tuple(centroids, assignments). If **samples** was a numpy array or
-           a host pointer tuple, the types are numpy arrays, otherwise, raw pointers
-           (integers) allocated on the same device. If **samples** are float16,
-           the returned centroids are float16 too.
+**return** tuple(centroids, assignments, \[average_distance\]).
+           If **samples** was a numpy array or a host pointer tuple, the types
+           are numpy arrays, otherwise, raw pointers (integers) allocated on the
+           same device. If **samples** are float16, the returned centroids are
+           float16 too.
 
 ```python
 def knn_cuda(k, samples, centroids, assignments, metric="L2", device=0, verbosity=0)
@@ -342,6 +366,108 @@ def knn_cuda(k, samples, centroids, assignments, metric="L2", device=0, verbosit
                 to be compatible with uint32. If **samples** is a tuple then
                 **assignments** is a pointer. The shape is (number of samples,).
 
+**metric** str, the name of the distance metric to use. The default is Euclidean (L2),
+           it can be changed to "cos" to change the algorithm to Spherical K-means
+           with the angular distance. Please note that samples *must* be normalized
+           in the latter case.
+
+**device** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device,
+           2 means second device, 3 means using first and second device. Special
+           value 0 enables all available devices. The default is 0.
+
+**verbosity** integer, 0 means complete silence, 1 means mere progress logging,
+              2 means lots of output.
+
+**return** neighbor indices. If **samples** was a numpy array or
+            a host pointer tuple, the return type is numpy array, otherwise, a
+            raw pointer (integer) allocated on the same device. The shape is
+            (number of samples, k).
+
+R examples
+----------
+#### K-means
+```R
+dyn.load("libKMCUDA.so")
+samples = replicate(4, runif(16000))
+result = .External("kmeans_cuda", samples, 50, tolerance=0.01,
+                   seed=777, verbosity=1, average_distance=TRUE)
+print(result$average_distance)
+print(result$centroids[1:10,])
+print(result$assignments[1:10])
+```
+
+#### K-nn
+```R
+dyn.load("libKMCUDA.so")
+samples = replicate(4, runif(16000))
+cls = .External("kmeans_cuda", samples, 50, tolerance=0.01,
+                seed=777, verbosity=1)
+result = .External("knn_cuda", 20, samples, cls$centroids, cls$assignments,
+                   verbosity=1)
+print(result[1:10,])
+```
+
+R API
+-----
+```R
+function kmeans_cuda(
+    samples, clusters, tolerance=0.01, init="k-means++", yinyang_t=0.1,
+    metric="L2", average_distance=FALSE, seed=Sys.time(), device=0, verbosity=0)
+```
+**samples** real matrix of shape \[number of samples, number of features\]
+            or list of real matrices which are rbind()-ed internally. No more
+            than INT32_MAX samples and UINT16_MAX features are supported.
+
+**clusters** integer, the number of clusters.
+
+**tolerance** real, if the relative number of reassignments drops below this value,
+              algorithm stops.
+
+**init** character vector or real matrix, sets the method for centroids initialization,
+         may be "k-means++", "afk-mc2", "random" or real matrix, of shape
+         \[**clusters**, number of features\].
+
+**yinyang_t** real, the relative number of cluster groups, usually 0.1.
+              0 disables Yinyang refinement.
+
+**metric** character vector, the name of the distance metric to use. The default
+           is Euclidean (L2), it can be changed to "cos" to change the algorithm
+           to Spherical K-means with the angular distance. Please note that
+           samples *must* be normalized in the latter case.
+
+**average_distance** logical, the value indicating whether to calculate
+                     the average distance between cluster elements and
+                     the corresponding centroids. Useful for finding
+                     the best K. Returned as the third list element.
+
+**seed** integer, random generator seed for reproducible results.
+
+**device** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device,
+           2 means second device, 3 means using first and second device. Special
+           value 0 enables all available devices. The default is 0.
+
+**verbosity** integer, 0 means complete silence, 1 means mere progress logging,
+              2 means lots of output.
+
+**return** list(centroids, assignments\[, average_distance\]). Indices in
+           assignments start from 1.
+
+```R
+function knn_cuda(k, samples, centroids, assignments, metric="L2", device=0, verbosity=0)
+```
+**k** integer, the number of neighbors to search for each sample. Must be ≤ 1<sup>16</sup>.
+
+**samples** real matrix of shape \[number of samples, number of features\]
+            or list of real matrices which are rbind()-ed internally.
+            In the latter case, is is possible to pass in more than INT32_MAX
+            samples.
+
+**centroids** real matrix with precalculated clusters' centroids (e.g., using
+              kmeans() or kmeans_cuda()).
+
+**assignments** integer vector with sample-cluster associations. Indices start
+                from 1.
+
 **metric** str, the name of the distance metric to use. The default is Euclidean (L2),
                 can be changed to "cos" to behave as Spherical K-means with the
                 angular distance. Please note that samples *must* be normalized in that
@@ -354,10 +480,8 @@ def knn_cuda(k, samples, centroids, assignments, metric="L2", device=0, verbosit
 **verbosity** integer, 0 means complete silence, 1 means mere progress logging,
               2 means lots of output.
 
-**return** neighbor indices. If **samples** was a numpy array or
-            a host pointer tuple, the return type is numpy array, otherwise, a
-            raw pointer (integer) allocated on the same device. The shape is
-            (number of samples, k).
+**return** integer matrix with neighbor indices. The shape is (number of samples, k).
+           Indices start from 1.
 
 C examples
 ----------
 
@@ -1,5 +1,6 @@
 cmake_minimum_required(VERSION 3.2)
 project(KMCUDA)
+set(CMAKE_MODULE_PATH ${CMAKE_HOME_DIRECTORY}/cmake)
 find_package(OpenMP REQUIRED)
 if (APPLE AND NOT CUDA_HOST_COMPILER)
   # https://gitlab.kitware.com/cmake/cmake/issues/13674
@@ -24,7 +25,9 @@ if (NOT DISABLE_PYTHON)
     execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import numpy; print(numpy.get_include())" OUTPUT_VARIABLE NUMPY_INCLUDES)
   endif()
 endif()
-
+if (NOT DISABLE_R)
+  find_package(R)
+endif()
 if (PROFILE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPROFILE")
 endif()
@@ -35,9 +38,12 @@ endif()
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Werror -DCUDA_ARCH=${CUDA_ARCH} -std=c++11 ${OpenMP_CXX_FLAGS}")
 set(SOURCE_FILES kmcuda.cc kmcuda.h wrappers.h private.h fp_abstraction.h tricks.cuh
                  metric_abstraction.h kmeans.cu knn.cu transpose.cu)
-if (NOT DISABLE_PYTHON)
+if (PYTHONLIBS_FOUND)
   list(APPEND SOURCE_FILES python.cc)
 endif()
+if (R_FOUND)
+  list(APPEND SOURCE_FILES r.cc)
+endif()
 if (CMAKE_BUILD_TYPE STREQUAL "Debug")
   set(NVCC_FLAGS "-G -g")
 endif()
@@ -59,10 +65,14 @@ if (APPLE)
   set(CMAKE_SHARED_LIBRARY_CXX_FLAGS "${CMAKE_SHARED_LIBRARY_CXX_FLAGS_BACKUP}")
 endif()
 target_link_libraries(KMCUDA ${CUDA_curand_LIBRARY})
-if(PYTHONLIBS_FOUND)
+if (PYTHONLIBS_FOUND)
   include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDES})
   target_link_libraries(KMCUDA ${PYTHON_LIBRARIES})
 endif()
+if (R_FOUND)
+  include_directories(${R_INCLUDE_DIRS})
+  target_link_libraries(KMCUDA ${R_LIBRARIES})
+endif()
 if (SUFFIX)
   set_target_properties(KMCUDA PROPERTIES SUFFIX ${SUFFIX})
 endif()
@@ -0,0 +1,55 @@
+# CMake module to find R
+# - Try to find R
+# Once done, this will define
+#
+#  R_FOUND - system has R
+#  R_INCLUDE_DIRS - the R include directories
+#  R_LIBRARIES - link these to use R
+#  R_ROOT_DIR - As reported by R
+# Autor: Omar Andres Zapata Mesa 31/05/2013
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+  set(CMAKE_FIND_APPBUNDLE "LAST")
+endif()
+find_program(R_EXECUTABLE NAMES R R.exe)
+#---searching R installtion unsing R executable
+if(R_EXECUTABLE)
+  execute_process(COMMAND ${R_EXECUTABLE} RHOME
+                  OUTPUT_VARIABLE R_ROOT_DIR
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+  find_path(R_INCLUDE_DIR R.h
+            HINTS ${R_ROOT_DIR}
+            PATHS /usr/local/lib /usr/local/lib64 /usr/share
+            PATH_SUFFIXES include R/include
+            DOC "Path to file R.h")
+  find_library(R_LIBRARY R
+            HINTS ${R_ROOT_DIR}/lib
+            DOC "R library (example libR.a, libR.dylib, etc.).")
+endif()
+#---setting include dirs and libraries
+set(R_LIBRARIES ${R_LIBRARY})
+set(R_INCLUDE_DIRS ${R_INCLUDE_DIR})
+foreach(_cpt ${R_FIND_COMPONENTS})
+  execute_process(COMMAND echo "cat(find.package('${_cpt}'))"
+                  COMMAND ${R_EXECUTABLE} --vanilla --slave
+                  OUTPUT_VARIABLE _cpt_path
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+  find_library(R_${_cpt}_LIBRARY
+               lib${_cpt}.so lib${_cpt}.dylib
+               HINTS ${_cpt_path}/lib)
+  if(R_${_cpt}_LIBRARY)
+    mark_as_advanced(R_${_cpt}_LIBRARY)
+    list(APPEND R_LIBRARIES ${R_${_cpt}_LIBRARY})
+  endif()
+  find_path(R_${_cpt}_INCLUDE_DIR ${_cpt}.h HINTS  ${_cpt_path} PATH_SUFFIXES include R/include)
+  if(R_${_cpt}_INCLUDE_DIR)
+    mark_as_advanced(R_${_cpt}_INCLUDE_DIR)
+    list(APPEND R_INCLUDE_DIRS ${R_${_cpt}_INCLUDE_DIR})
+  endif()
+  if(R_${_cpt}_INCLUDE_DIR AND R_${_cpt}_LIBRARY)
+    list(REMOVE_ITEM R_FIND_COMPONENTS ${_cpt})
+  endif()
+endforeach()
+# Handle the QUIETLY and REQUIRED arguments and set R_FOUND to TRUE if all listed variables are TRUE
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(R DEFAULT_MSG R_EXECUTABLE R_INCLUDE_DIR R_LIBRARY)
+mark_as_advanced(R_FOUND R_EXECUTABLE R_INCLUDE_DIR R_LIBRARY)