Merge pull request #477 from massimoaria/develop

massimoaria · web-flow · commit 0a8ac2dd9615 · 2024-07-01T16:34:40.000+02:00
CRAN submission 4.3.0
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: bibliometrix
 Type: Package
 Title: Comprehensive Science Mapping Analysis
-Version: 4.2.4
+Version: 4.3.0
 Authors@R: c(
     person(given = "Massimo",
            family = "Aria",
diff --git a/NEWS b/NEWS
@@ -1,4 +1,4 @@
-bibliometrix v4.2.4 (Release date: 2024-06-20)
+bibliometrix v4.3.0 (Release date: 2024-06-30)
 
 Features:
 * Improved the function mergeDbSources()
diff --git a/R/biblioNetwork.R b/R/biblioNetwork.R
@@ -45,7 +45,7 @@
 #' @param shortlabel is logical. IF TRUE, reference labels are stored in a short format. Default is \code{shortlabel=TRUE}.
 #' @param remove.terms is a character vector. It contains a list of additional terms to delete from the documents before term extraction. The default is \code{remove.terms = NULL}.
 #' @param synonyms is a character vector. Each element contains a list of synonyms, separated by ";",  that will be merged into a single term (the first word contained in the vector element). The default is \code{synonyms = NULL}.
-#' @return It is a squared network matrix. It is an object of class \code{dgMatrix} of the package \code{\link{Matrix}}.
+#' @return It is a squared network matrix. It is an object of class \code{dgMatrix} of the package \code{Matrix}.
 #' @examples
 #' # EXAMPLE 1: Authors collaboration network
 #'
diff --git a/R/cocMatrix.R b/R/cocMatrix.R
@@ -29,7 +29,7 @@ utils::globalVariables(c("item","SR"))
 #' @param type indicates the output format of co-occurrences: \tabular{lll}{
 #'   \code{type = "matrix"} \tab   \tab produces an object of class
 #'   \code{matrix}\cr \code{type = "sparse"} \tab   \tab produces an object of
-#'   class \code{dgMatrix} of the package \code{\link{Matrix}}. "sparse"
+#'   class \code{dgMatrix} of the package \code{Matrix}. "sparse"
 #'   argument generates a compact representation of the matrix.}
 #' @param n is an integer. It indicates the number of items to select. If \code{N = NULL}, all items are selected.
 #' @param sep is the field separator character. This character separates strings in each 
diff --git a/R/collabByRegionPlot.R b/R/collabByRegionPlot.R
@@ -44,7 +44,7 @@ utils::globalVariables(c("countries","continent"))
 #' @return It is a list containing the following elements:
 #' \tabular{lll}{
 #' \code{graph} \tab  \tab a network object of the class \code{igraph}\cr
-#' \code{cluster_obj} \tab  \tab a \code{\link{communities}} object of the package \code{igraph}\cr
+#' \code{cluster_obj} \tab  \tab a \code{communities} object of the package \code{igraph}\cr
 #' \code{cluster_res} \tab  \tab a data frame with main results of clustering procedure.\cr}
 #' 
 #' 
diff --git a/R/csvOA2df.R b/R/csvOA2df.R
@@ -1,4 +1,4 @@
-utils::globalVariables(c("all_of", "corr", "DI", "C1","id_oa","RP","UN","AU_ID","corresponding_author_ids"))
+utils::globalVariables(c("all_of", "corr", "DI", "C1","id_oa","RP","UN","AU_ID","corresponding_author_ids", "References"))
 
 csvOA2df <- function(file){
   options(readr.num_columns = 0)
@@ -28,7 +28,7 @@ csvOA2df <- function(file){
   # recode as numeric
   DATA$TC <- as.numeric(DATA$TC)
   DATA$PY <- as.numeric(DATA$PY)
-  DATA$relevance_score <- as.numeric(DATA$relevance_score)
+  #DATA$relevance_score <- as.numeric(DATA$relevance_score)
   
   # replace | with ;
   DATA <- DATA %>% 
@@ -48,21 +48,35 @@ csvOA2df <- function(file){
   AFF <- DATA %>% 
     select(id_oa, starts_with("authorships_raw_affiliation_strings_")) 
   
-  colId <- c(-1,parse_number(colnames(AFF)[-1]))
-  
-  DATA <- AFF[order(colId)] %>% 
-    unite(., C1, starts_with("authorships_raw_affiliation_strings_"), sep=";") %>% 
-    mutate(C1 = gsub("NA","",C1),
-           C1 = TrimMult(C1,char=";")) %>% 
-    bind_cols(DATA %>% 
-                select(-"id_oa", -starts_with("authorships_raw_affiliation_strings_")))
-  
+  if(ncol(AFF)>1){
+    colId <- c(-1,parse_number(colnames(AFF)[-1]))
+    
+    DATA <- AFF[order(colId)] %>% 
+      unite(., C1, starts_with("authorships_raw_affiliation_strings_"), sep=";") %>% 
+      mutate(C1 = gsub("NA","",C1),
+             C1 = TrimMult(C1,char=";")) %>% 
+      bind_cols(DATA %>% 
+                  select(-"id_oa", -starts_with("authorships_raw_affiliation_strings_")))
+  } else {
+    AFF <- lapply(stri_extract_all_regex(DATA$authorships.raw_affiliation_strings, "\\[([^\\]]+)\\]"), function(l){
+      gsub("\\['|'\\]","",l)
+    })
+    
+    AFF <- data.frame(id_oa=rep(DATA$id_oa, lengths(AFF)), C1 = unlist(AFF)) %>% 
+      group_by(id_oa) %>% 
+      summarize(C1 = paste(C1,collapse=";")) 
+    DATA <- DATA %>% 
+      left_join(AFF, by = "id_oa")
+    DATA$C1[is.na(DATA$C1)] <- ""
+  }
+
   DATA$C1 <- gsub("https://", "", DATA$C1)
   
   # country string
   CO <- DATA %>% 
     select(id_oa, starts_with("authorships_countries_")) 
   
+  if(ncol(CO)>1){
   colId <- c(-1,parse_number(colnames(CO)[-1]))
   
   DATA <- CO[order(colId)] %>% 
@@ -71,12 +85,32 @@ csvOA2df <- function(file){
            AU_CO = TrimMult(AU_CO,char=";")) %>% 
     bind_cols(DATA %>% 
                 select(-"id_oa", -starts_with("authorships_countries_")))
+  } else {
+    CO <- lapply(stri_extract_all_regex(DATA$authorships.countries, "\\[([^\\]]+)\\]"), function(l){
+      gsub("\\['|'\\]","",l)
+    })
+    
+    CO <- data.frame(id_oa=rep(DATA$id_oa, lengths(CO)), AU_CO = unlist(CO)) %>% 
+      group_by(id_oa) %>% 
+      summarize(AU_CO = gsub("'","",paste(AU_CO,collapse=";")))
+    DATA <- DATA %>% 
+      left_join(CO, by = "id_oa")
+    DATA$AU_CO[is.na(DATA$AU_CO)] <- ""
+  }
+  
   
   ## corresponding author
   DATA <- DATA %>% 
     mutate(AU1_ID = gsub(";.*", "", corresponding_author_ids))
   UN <- strsplit(DATA$C1,";")
-  corresp <- strsplit(DATA$authorships_is_corresponding,";")
+  if ("authorships_is_corresponding" %in% names(DATA)){
+    corresp <- strsplit(tolower(DATA$authorships_is_corresponding),";")
+  } else {
+    corresp <- strsplit(tolower(DATA$authorships.is_corresponding),";")
+  }
+  
+  
+  
   df_UN <- data.frame(UN=unlist(UN), id_oa=rep(DATA$id_oa,lengths(UN))) %>% 
     group_by(id_oa) %>% 
     mutate(n=row_number())
@@ -120,6 +154,7 @@ csvOA2df <- function(file){
     mutate(across(all_of(label), toupper),
            DI = gsub("https://doi.org/","",DI),
            DI = ifelse(DI == "null",NA,DI)) 
+  DATA$SO <- toupper(DATA$SO)
   
   return(DATA)
 }
@@ -130,14 +165,18 @@ relabelling_OA <- function(DATA){
   label[label %in% "id"] <- "id_oa"
   label[label %in% "display_name"] <- "TI"
   label[label %in% "primary_location_display_name"] <- "SO"
+  label[label %in% "locations.source.display_name"] <- "SO"
   label[label %in% "primary_location_id"] <- "SO_ID"
+  label[label %in% "locations.source.id"] <- "SO_ID"
   label[label %in% "primary_location_host_organization"] <- "PU"
   label[label %in% "primary_location_issns"] <- "ISSN"
   label[label %in% "primary_location_issn_l"] <- "ISSN_I"
   label[label %in% "primary_location_landing_page_url"] <- "URL"
   label[label %in% "primary_location_pdf_url"] <- "URL_PDF"
   label[label %in% "author_ids"] <- "AU_ID"
+  label[label %in% "authorships.author.id"] <- "AU_ID"
   label[label %in% "author_names"] <- "AU"
+  label[label %in% "authorships.author.display_name"] <- "AU"
   label[label %in% "author_orcids"] <- "OI"
   label[label %in% "author_institution_names"] <- "C3"
   label[label %in% "cited_by_count"] <- "TC"
@@ -147,6 +186,7 @@ relabelling_OA <- function(DATA){
   label[label %in% "biblio_volume"] <- "VL"
   label[label %in% "referenced_works" ] <- "CR"
   label[label %in% "keywords_display_name"] <- "DE"
+  label[label %in% "keywords.display_name"] <- "DE"
   label[label %in% "abstract"] <- "AB"
   label[label %in% "concepts_display_name"] <- "CONCEPTS"
   label[label %in% "topics_display_name"] <- "TOPICS"
@@ -165,4 +205,4 @@ relabelling_OA <- function(DATA){
 TrimMult <- function(x, char=" ") {
   return(gsub(paste0("^", char, "*|(?<=", char, ")", char, "|", char, "*$"),
               "", x, perl=T))
-}
+}
diff --git a/R/networkPlot.R b/R/networkPlot.R
@@ -47,7 +47,7 @@ utils::globalVariables(c("degree"))
 #' @return It is a list containing the following elements:
 #' \tabular{lll}{
 #' \code{graph} \tab  \tab a network object of the class \code{igraph}\cr
-#' \code{cluster_obj} \tab  \tab a \code{\link{communities}} object of the package \code{igraph}\cr
+#' \code{cluster_obj} \tab  \tab a \code{communities} object of the package \code{igraph}\cr
 #' \code{cluster_res} \tab  \tab a data frame with main results of clustering procedure.\cr}
 #' 
 #' 
diff --git a/R/networkStat.R b/R/networkStat.R
@@ -10,7 +10,7 @@
 #' @return It is a list containing the following elements:
 #' \tabular{lll}{
 #' \code{graph} \tab  \tab a network object of the class \code{igraph}\cr
-#' \code{network} \tab  \tab a \code{\link{communities}} a list with the main statistics of the network\cr
+#' \code{network} \tab  \tab a \code{communities} a list with the main statistics of the network\cr
 #' \code{vertex} \tab  \tab a data frame with the main measures of centrality and prestige of vertices.\cr}
 #' 
 #' 
diff --git a/R/rpys.R b/R/rpys.R
@@ -108,12 +108,16 @@ logo <- grid::rasterGrob(logo,interpolate = TRUE)
 x <- c(min(RPYS$Year),min(RPYS$Year)+diff(range(RPYS$Year))*0.125)+1
 y <- c(min(c(RPYS$Citations,RPYS$diffMedian)),min(c(RPYS$Citations,RPYS$diffMedian))+diff(range(c(RPYS$Citations,RPYS$diffMedian)))*0.125)*1.05
 
+RPYS <- RPYS %>% 
+  left_join(CR %>% 
+              group_by(citedYears) %>% 
+              slice_max(order_by = Freq, n=3, with_ties = FALSE) %>% 
+              summarize(References = paste(firstup(Reference),collapse="\n")), 
+            by=c("Year" = "citedYears"))
 
 
-g=ggplot(RPYS, aes(x=Year ,y=Citations,text=paste("Year: ",Year,"\nN. of References: ",Citations)))+
+g=ggplot(RPYS, aes(x=Year ,y=Citations,text=paste("Year: ",Year," - Total Citations: ",Citations,"\nTop 3 References:\n",References)))+
   geom_line(aes(group="NA")) +
-  #geom_area(aes(group="NA"),fill = 'grey90', alpha = .5) +
-  #geom_hline(aes(yintercept=0, color = 'grey'))+
   geom_line(aes(x=Year,y=diffMedian, color="firebrick", group="NA"))+
   labs(x = 'Year'
        , y = 'Cited References'
@@ -131,8 +135,8 @@ g=ggplot(RPYS, aes(x=Year ,y=Citations,text=paste("Year: ",Year,"\nN. of Referen
         ,axis.title.y = element_text(vjust = 1, angle = 90)
         ,axis.title.x = element_text(hjust = 0.95, angle = 0)
         ,axis.text.x = element_text(size=8,angle = 90)
-        ,axis.line.x = element_line(color="black", size=0.5)
-        ,axis.line.y = element_line(color="black", size=0.5)
+        ,axis.line.x = element_line(color="black", linewidth=0.5)
+        ,axis.line.y = element_line(color="black", linewidth=0.5)
   ) + annotation_custom(logo, xmin = x[1], xmax = x[2], ymin = y[1], ymax = y[2]) 
 
     if (isTRUE(graph)){plot(g)}
@@ -141,7 +145,7 @@ g=ggplot(RPYS, aes(x=Year ,y=Citations,text=paste("Year: ",Year,"\nN. of Referen
       rename(Year = citedYears) %>% 
       ungroup()
     result=list(spectroscopy=g, 
-                rpysTable=RPYS, 
+                rpysTable=RPYS %>% select(-References), 
                 CR=CR %>% mutate(Year = as.character(Year)), 
                 df=df)
     return(result)
diff --git a/R/zzz.R b/R/zzz.R
@@ -737,3 +737,10 @@ colorlist <- function(){
              ,"#B3B3B3","#A6CEE3","#1F78B4","#B2DF8A","#33A02C","#FB9A99","#E31A1C","#FDBF6F","#FF7F00","#CAB2D6","#6A3D9A","#B15928","#8DD3C7","#BEBADA"
              ,"#FB8072","#80B1D3","#FDB462","#B3DE69","#D9D9D9","#BC80BD","#CCEBC5")
 }
+
+#Initial to upper case
+firstup <- function(x) {
+  x <- tolower(x)
+  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
+  x
+}
diff --git a/inst/biblioshiny/libraries.R b/inst/biblioshiny/libraries.R
@@ -24,15 +24,17 @@ libraries <- function(){
     ## Currently "webshot2" 0.1.1 generates empty screenshots on windows 10 for graphics created with visnetwork.
     ## This workaround installs the previous version 0.1.0 to temporarily fix the problem.
     if (!require(webshot2,quietly=TRUE)){
-      install.packages("https://cran.r-project.org/src/contrib/Archive/webshot2/webshot2_0.1.0.tar.gz", 
-                       repos = NULL, type = "source", c("Depends", "Imports"))
+      install.packages("webshot2")
+      detach("package:webshot2", unload = TRUE, force=TRUE)
+      install.packages("https://cran.r-project.org/src/contrib/Archive/webshot2/webshot2_0.1.0.tar.gz",
+                       repos = NULL, type = "source", dependencies=c("Depends", "Imports"))
     }else{
       pkgs <- installed.packages()[, "Version"]
       vers <- pkgs["webshot2"]
       if (vers!="0.1.0"){
         detach("package:webshot2", unload = TRUE, force=TRUE)
-        install.packages("https://cran.r-project.org/src/contrib/Archive/webshot2/webshot2_0.1.0.tar.gz", 
-                         repos = NULL, type = "source", c("Depends", "Imports"))
+        install.packages("https://cran.r-project.org/src/contrib/Archive/webshot2/webshot2_0.1.0.tar.gz",
+                         repos = NULL, type = "source", dependencies=c("Depends", "Imports"))
       }
     }
     ##
diff --git a/inst/biblioshiny/utils.R b/inst/biblioshiny/utils.R
@@ -297,7 +297,7 @@ getFileNameExtension <- function (fn) {
   ext
 }
 
-#Initial to upper case
+# Initial to upper case
 firstup <- function(x) {
   x <- tolower(x)
   substr(x, 1, 1) <- toupper(substr(x, 1, 1))
@@ -2232,6 +2232,7 @@ plot2png <- function(p, filename, zoom = 2, type="vis", tmpdir){
            htmlwidgets::saveWidget(p, file=html_name)
          })
   webshot2::webshot(url = html_name, zoom = zoom, file = filename)#, verbose=FALSE)
+  
   popUpGeneric(title=NULL, type="success", color=c("#1d8fe1"),
                subtitle=paste0("Plot was saved in the following path: ",filename),
                btn_labels="OK", size="40%")
diff --git a/man/biblioNetwork.Rd b/man/biblioNetwork.Rd
diff --git a/man/cocMatrix.Rd b/man/cocMatrix.Rd
diff --git a/man/collabByRegionPlot.Rd b/man/collabByRegionPlot.Rd
diff --git a/man/networkPlot.Rd b/man/networkPlot.Rd
diff --git a/man/networkStat.Rd b/man/networkStat.Rd

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-bibliometrix v4.2.4 (Release date: 2024-06-20)`
	`1`	`+bibliometrix v4.3.0 (Release date: 2024-06-30)`
`2`	`2`
`3`	`3`	`Features:`
`4`	`4`	`* Improved the function mergeDbSources()`