Skip to content

Commit

Permalink
Prepare for intermediate release
Browse files Browse the repository at this point in the history
  • Loading branch information
jeroen committed Mar 27, 2018
1 parent 1f03be7 commit 0a2745a
Show file tree
Hide file tree
Showing 8 changed files with 6 additions and 76 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Package: pdftools
Type: Package
Title: Text Extraction, Rendering and Converting of PDF Documents
Version: 1.5.9000
Author: Jeroen Ooms
Maintainer: Jeroen Ooms <[email protected]>
Version: 1.6
Authors@R: person("Jeroen", "Ooms", role = c("aut", "cre"), email = "[email protected]",
comment = c(ORCID = "0000-0002-4035-0289"))
Description: Utilities based on 'libpoppler' for extracting text, fonts, attachments and
metadata from a PDF file. Also supports high quality rendering of PDF documents info
PNG, JPEG, TIFF format, or into raw bitmap vectors for further processing in R.
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

export(pdf_attachments)
export(pdf_convert)
export(pdf_data)
export(pdf_fonts)
export(pdf_info)
export(pdf_render_page)
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
1.6
- pdf_render_page() and pdf_convert() gain argument to control 'antialias'
- Small tweaks in pdf_text() for dealing with malformed pdf files

1.5
Expand Down
4 changes: 0 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ poppler_pdf_info <- function(x, opw, upw) {
.Call('_pdftools_poppler_pdf_info', PACKAGE = 'pdftools', x, opw, upw)
}

poppler_pdf_data <- function(x, opw, upw) {
.Call('_pdftools_poppler_pdf_data', PACKAGE = 'pdftools', x, opw, upw)
}

poppler_pdf_text <- function(x, opw, upw) {
.Call('_pdftools_poppler_pdf_text', PACKAGE = 'pdftools', x, opw, upw)
}
Expand Down
8 changes: 1 addition & 7 deletions R/tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#'
#' Poppler is pretty verbose when encountering minor errors in PDF files,
#' in especially \code{\link{pdf_text}}. These messages are usually safe
#' to ignore, use \code{\link{suppressMessages}} to hide them alltogether.
#' to ignore, use \code{\link{suppressMessages}} to hide them altogether.
#'
#' @export
#' @param pdf file path or raw vector with pdf data
Expand All @@ -32,12 +32,6 @@ pdf_text <- function(pdf, opw = "", upw = "") {
poppler_pdf_text(loadfile(pdf), opw, upw)
}

#' @rdname pdftools
#' @export
pdf_data <- function(pdf, opw = "", upw = "") {
poppler_pdf_data(loadfile(pdf), opw, upw)
}

#' @rdname pdftools
#' @export
pdf_fonts<- function(pdf, opw = "", upw = "") {
Expand Down
5 changes: 1 addition & 4 deletions man/pdftools.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// poppler_pdf_data
List poppler_pdf_data(RawVector x, std::string opw, std::string upw);
RcppExport SEXP _pdftools_poppler_pdf_data(SEXP xSEXP, SEXP opwSEXP, SEXP upwSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< RawVector >::type x(xSEXP);
Rcpp::traits::input_parameter< std::string >::type opw(opwSEXP);
Rcpp::traits::input_parameter< std::string >::type upw(upwSEXP);
rcpp_result_gen = Rcpp::wrap(poppler_pdf_data(x, opw, upw));
return rcpp_result_gen;
END_RCPP
}
// poppler_pdf_text
CharacterVector poppler_pdf_text(RawVector x, std::string opw, std::string upw);
RcppExport SEXP _pdftools_poppler_pdf_text(SEXP xSEXP, SEXP opwSEXP, SEXP upwSEXP) {
Expand Down Expand Up @@ -154,7 +141,6 @@ static const R_CallMethodDef CallEntries[] = {
{"_pdftools_set_poppler_data", (DL_FUNC) &_pdftools_set_poppler_data, 1},
{"_pdftools_get_poppler_config", (DL_FUNC) &_pdftools_get_poppler_config, 0},
{"_pdftools_poppler_pdf_info", (DL_FUNC) &_pdftools_poppler_pdf_info, 3},
{"_pdftools_poppler_pdf_data", (DL_FUNC) &_pdftools_poppler_pdf_data, 3},
{"_pdftools_poppler_pdf_text", (DL_FUNC) &_pdftools_poppler_pdf_text, 3},
{"_pdftools_poppler_pdf_fonts", (DL_FUNC) &_pdftools_poppler_pdf_fonts, 3},
{"_pdftools_poppler_pdf_files", (DL_FUNC) &_pdftools_poppler_pdf_files, 3},
Expand Down
43 changes: 0 additions & 43 deletions src/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
#include <Rcpp.h>
#include <cstring>

#if defined(POPPLER_VERSION_MINOR) && (POPPLER_VERSION_MINOR >= 63 || POPPLER_VERSION_MAJOR > 0)
#define POPPLER_HAS_PAGE_TEXT_LIST
#endif

using namespace Rcpp;
using namespace poppler;

Expand Down Expand Up @@ -164,45 +160,6 @@ List poppler_pdf_info (RawVector x, std::string opw, std::string upw) {
);
}

// [[Rcpp::export]]
List poppler_pdf_data (RawVector x, std::string opw, std::string upw) {
#ifdef POPPLER_HAS_PAGE_TEXT_LIST
document *doc = read_raw_pdf(x, opw, upw);
Rcpp::List out(doc->pages());
for(int i = 0; i < doc->pages(); i++){
page *p(doc->create_page(i));
if(!p) continue; //missing page
std::vector<text_box> boxes = p->text_list();
CharacterVector text(boxes.size());
IntegerVector width(boxes.size());
IntegerVector height(boxes.size());
IntegerVector x(boxes.size());
IntegerVector y(boxes.size());
Rcpp::LogicalVector space(boxes.size());
for(size_t j = 0; j < boxes.size(); j++){
text[j] = ustring_to_utf8(boxes.at(j).text());
width[j] = boxes.at(j).bbox().width();
height[j] = boxes.at(j).bbox().height();
x[j] = boxes.at(j).bbox().x();
y[j] = boxes.at(j).bbox().y();
space[j] = boxes.at(j).has_space_after();
}
out[i] = DataFrame::create(
_["text"] = text,
_["width"] = width,
_["height"] = height,
_["x"] = x,
_["y"] = y,
_["space"] = space,
_["stringsAsFactors"] = false
);
}
return out;
#else //POPPLER_HAS_PAGE_TEXT_LIST
throw std::runtime_error(std::string("This feature requires poppler >= 0.63. You have ") + POPPLER_VERSION);
#endif
}

// [[Rcpp::export]]
CharacterVector poppler_pdf_text (RawVector x, std::string opw, std::string upw) {
document *doc = read_raw_pdf(x, opw, upw);
Expand Down

0 comments on commit 0a2745a

Please sign in to comment.