bnosac · seonghobae · Jan 31, 2020 · Jan 31, 2020 · Jan 31, 2020 · Jan 31, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,6 +12,6 @@ License: MPL-2.0
 URL: https://github.com/bnosac/textrank
 Encoding: UTF-8
 Imports: utils, data.table (>= 1.9.6), igraph, digest
-Suggests: textreuse, knitr, udpipe (>= 0.2)
-RoxygenNote: 6.0.1
+Suggests: textreuse, knitr, udpipe (>= 0.2), future.apply
+RoxygenNote: 7.0.2
 VignetteBuilder: knitr
diff --git a/R/textrank.R b/R/textrank.R
@@ -49,6 +49,7 @@ textrank_jaccard <- function(termsa, termsb){
 #'                          textrank_candidates = candidates)
 #' summary(tr, n = 2)
 textrank_candidates_lsh <- function(x, sentence_id, minhashFUN, bands){
+
   textrank_id_1 <- textrank_id_2 <- sentence_id.left <- sentence_id.right <- hash <- band <- NULL
 
   stopifnot(length(x) == length(sentence_id))
@@ -59,17 +60,34 @@ textrank_candidates_lsh <- function(x, sentence_id, minhashFUN, bands){
   if(length(examplehash) %% rows != 0) {
     stop(sprintf("the number of hashes (%s) should be a multiple of bands (%s)", length(examplehash), bands))
   }
-  hash_bands <- unlist(lapply(seq_len(bands), FUN=function(i) rep(i, times = rows)))
+
+  if(requireNamespace('future.apply', quietly = T)){
+    hash_bands <- unlist(future.apply::future_lapply(seq_len(bands), FUN=function(i) rep(i, times = rows)))
+  } else {
+    hash_bands <- unlist(lapply(seq_len(bands), FUN=function(i) rep(i, times = rows)))
+  }
+
 
   sentence_to_bucket <- split(x, sentence_id)
-  sentence_to_bucket <- mapply(sentence_id = names(sentence_to_bucket), sentence_to_bucket, FUN=function(sentence_id, words){
-    buckets <- data.table(sentence_id = sentence_id,
-                          hash = minhashFUN(words),
-                          band = hash_bands)
-    buckets <- buckets[, list(bucket = digest::digest(object = list(hashes = hash, b = band[1]))), by = list(sentence_id, band)]
-    buckets <- buckets[, c("sentence_id", "bucket"), with = FALSE]
-    buckets
-  }, SIMPLIFY = FALSE)
+  if(requireNamespace('future.apply', quietly = T)){
+    sentence_to_bucket <- future.apply::future_mapply(sentence_id = names(sentence_to_bucket), sentence_to_bucket, FUN=function(sentence_id, words){
+      buckets <- data.table(sentence_id = sentence_id,
+                            hash = minhashFUN(words),
+                            band = hash_bands)
+      buckets <- buckets[, list(bucket = digest::digest(object = list(hashes = hash, b = band[1]))), by = list(sentence_id, band)]
+      buckets <- buckets[, c("sentence_id", "bucket"), with = FALSE]
+      buckets
+    }, SIMPLIFY = FALSE)
+  } else {
+    sentence_to_bucket <- mapply(sentence_id = names(sentence_to_bucket), sentence_to_bucket, FUN=function(sentence_id, words){
+      buckets <- data.table(sentence_id = sentence_id,
+                            hash = minhashFUN(words),
+                            band = hash_bands)
+      buckets <- buckets[, list(bucket = digest::digest(object = list(hashes = hash, b = band[1]))), by = list(sentence_id, band)]
+      buckets <- buckets[, c("sentence_id", "bucket"), with = FALSE]
+      buckets
+    }, SIMPLIFY = FALSE)
+  }
   sentence_to_bucket <- data.table::rbindlist(sentence_to_bucket)
 
   candidates <- merge(sentence_to_bucket, sentence_to_bucket, by = "bucket", suffixes = c(".left", ".right"), all.x=TRUE, all.y=FALSE)
@@ -103,14 +121,28 @@ textrank_candidates_all <- function(x){
   stopifnot(x_length > 1)
   if(x_length < 200){
     candidates <- utils::combn(x = x, m = 2, simplify = FALSE)
-    candidates <- lapply(candidates, FUN=function(x){
-      list(textrank_id_1 = x[1],
-           textrank_id_2 = x[2])
-    })
-  }else{
-    candidates <- lapply(seq(x)[-x_length], function(i){
-      data.table::data.table(textrank_id_1 = x[i], textrank_id_2 = x[(i+1L):x_length])
-    })
+    if(requireNamespace('future.apply', quietly = T)){
+      candidates <- future.apply::future_lapply(candidates, FUN=function(x){
+        list(textrank_id_1 = x[1],
+             textrank_id_2 = x[2])
+      })
+    } else {
+      candidates <- lapply(candidates, FUN=function(x){
+        list(textrank_id_1 = x[1],
+             textrank_id_2 = x[2])
+      })
+    }
+  } else {
+    if(requireNamespace('future.apply', quietly = T)){
+      candidates <- future.apply::future_lapply(seq(x)[-x_length], function(i){
+        data.table::data.table(textrank_id_1 = x[i], textrank_id_2 = x[(i+1L):x_length])
+      })
+    } else {
+      candidates <- lapply(seq(x)[-x_length], function(i){
+        data.table::data.table(textrank_id_1 = x[i], textrank_id_2 = x[(i+1L):x_length])
+      })
+    }
+
   }
   candidates <- data.table::rbindlist(candidates)
   candidates <- setDF(candidates)
@@ -195,6 +227,8 @@ textrank_sentences <- function(data, terminology,
                      max = 1000,
                      options_pagerank = list(directed = FALSE),
                      ...){
+
+
   textrank_id <- NULL
 
   stopifnot(sum(duplicated(data[, 1])) == 0)
@@ -226,9 +260,17 @@ textrank_sentences <- function(data, terminology,
     max <- min(nrow(sent2sent_distance), max)
     sent2sent_distance <- sent2sent_distance[sample.int(n = nrow(sent2sent_distance), size = max), ]
   }
-  sent2sent_distance <- mapply(id1 = sent2sent_distance$textrank_id_1,
-                               id2 = sent2sent_distance$textrank_id_2, FUN = sentence_dist, MoreArgs = list(distFUN = textrank_dist, ...),
-                               SIMPLIFY = FALSE)
+
+  if(requireNamespace('future.apply', quietly = T)){
+    sent2sent_distance <- future.apply::future_mapply(id1 = sent2sent_distance$textrank_id_1,
+                                                      id2 = sent2sent_distance$textrank_id_2, FUN = sentence_dist, MoreArgs = list(distFUN = textrank_dist, ...),
+                                                      SIMPLIFY = FALSE)
+  } else {
+    sent2sent_distance <- mapply(id1 = sent2sent_distance$textrank_id_1,
+                                 id2 = sent2sent_distance$textrank_id_2, FUN = sentence_dist, MoreArgs = list(distFUN = textrank_dist, ...),
+                                 SIMPLIFY = FALSE)
+  }
+
   sent2sent_distance <- data.table::rbindlist(sent2sent_distance)
 
   ## Calculate pagerank
@@ -389,11 +431,22 @@ textrank_keywords <- function(x, relevant=rep(TRUE, length(x)), p = 1/3, ngram_m
     keywordcombinations$ngram <- ifelse(stop_already, keywordcombinations$ngram, keywordcombinations$ngram + 1L)
     output_per_ngram[[i]] <- keywordcombinations[keywordcombinations$ngram == i, ]
   }
-  output_per_ngram <- lapply(output_per_ngram, FUN=function(x){
-    x <- x[!is.na(keyword), list(freq = .N), by = list(keyword, ngram)]
-    x <- x[order(freq, decreasing=TRUE), ]
-    x
-  })
+
+
+  if(requireNamespace('future.apply', quietly = T)){
+    output_per_ngram <- future.apply::future_lapply(output_per_ngram, FUN=function(x){
+      x <- x[!is.na(keyword), list(freq = .N), by = list(keyword, ngram)]
+      x <- x[order(freq, decreasing=TRUE), ]
+      x
+    })
+  } else {
+    output_per_ngram <- lapply(output_per_ngram, FUN=function(x){
+      x <- x[!is.na(keyword), list(freq = .N), by = list(keyword, ngram)]
+      x <- x[order(freq, decreasing=TRUE), ]
+      x
+    })
+  }
+
   output_per_ngram <- data.table::rbindlist(output_per_ngram)
   output_per_ngram <- setDF(output_per_ngram)
 

diff --git a/man/summary.textrank_sentences.Rd b/man/summary.textrank_sentences.Rd
diff --git a/man/textrank_keywords.Rd b/man/textrank_keywords.Rd
diff --git a/man/textrank_sentences.Rd b/man/textrank_sentences.Rd