47c773be44cb411fb220a18c02e55c951ecde887 — boB Rudis a month ago 92e55c3
Addresses #4
60 files changed, 38 insertions(+), 13 deletions(-)

M NEWS.md
M R/get-content-type.R
M R/guess-content-type.R
M R/sysdata.rda
A inst/extdat/no-guess/csv.docx
R inst/extdat/{actions.csv => pass-through/actions.csv}
R inst/extdat/{actions.txt => pass-through/actions.txt}
R inst/extdat/{actions.xlsx => pass-through/actions.xlsx}
R inst/extdat/{test.au => pass-through/test.au}
R inst/extdat/{test.bin => pass-through/test.bin}
R inst/extdat/{test.bmp => pass-through/test.bmp}
R inst/extdat/{test.dtd => pass-through/test.dtd}
R inst/extdat/{test.emf => pass-through/test.emf}
R inst/extdat/{test.eps => pass-through/test.eps}
R inst/extdat/{test.fli => pass-through/test.fli}
R inst/extdat/{test.gif => pass-through/test.gif}
R inst/extdat/{test.ico => pass-through/test.ico}
R inst/extdat/{test.jpg => pass-through/test.jpg}
R inst/extdat/{test.mp3 => pass-through/test.mp3}
R inst/extdat/{test.odt => pass-through/test.odt}
R inst/extdat/{test.ogg => pass-through/test.ogg}
R inst/extdat/{test.pcx => pass-through/test.pcx}
R inst/extdat/{test.pdf => pass-through/test.pdf}
R inst/extdat/{test.pl => pass-through/test.pl}
R inst/extdat/{test.png => pass-through/test.png}
R inst/extdat/{test.pnm => pass-through/test.pnm}
R inst/extdat/{test.ppm => pass-through/test.ppm}
R inst/extdat/{test.ppt => pass-through/test.ppt}
R inst/extdat/{test.ps => pass-through/test.ps}
R inst/extdat/{test.psd => pass-through/test.psd}
R inst/extdat/{test.py => pass-through/test.py}
R inst/extdat/{test.rtf => pass-through/test.rtf}
R inst/extdat/{test.sh => pass-through/test.sh}
R inst/extdat/{test.tar => pass-through/test.tar}
R inst/extdat/{test.tar.gz => pass-through/test.tar.gz}
R inst/extdat/{test.tga => pass-through/test.tga}
R inst/extdat/{test.txt => pass-through/test.txt}
R inst/extdat/{test.txt.gz => pass-through/test.txt.gz}
R inst/extdat/{test.wav => pass-through/test.wav}
R inst/extdat/{test.wmf => pass-through/test.wmf}
R inst/extdat/{test.xcf => pass-through/test.xcf}
R inst/extdat/{test.xml => pass-through/test.xml}
R inst/extdat/{test.xpm => pass-through/test.xpm}
R inst/extdat/{test.zip => pass-through/test.zip}
R inst/extdat/{test_128_44_jstereo.mp3 => pass-through/test_128_44_jstereo.mp3}
R inst/extdat/{test_excel.xlsm => pass-through/test_excel.xlsm}
R inst/extdat/{test_excel.xlsx => pass-through/test_excel.xlsx}
R inst/extdat/{test_excel_2000.xls => pass-through/test_excel_2000.xls}
R inst/extdat/{test_excel_spreadsheet.xml => pass-through/test_excel_spreadsheet.xml}
R inst/extdat/{test_excel_web_archive.mht => pass-through/test_excel_web_archive.mht}
R inst/extdat/{test_nocompress.tif => pass-through/test_nocompress.tif}
R inst/extdat/{test_powerpoint.pptm => pass-through/test_powerpoint.pptm}
R inst/extdat/{test_powerpoint.pptx => pass-through/test_powerpoint.pptx}
R inst/extdat/{test_word.docm => pass-through/test_word.docm}
R inst/extdat/{test_word.docx => pass-through/test_word.docx}
R inst/extdat/{test_word_2000.doc => pass-through/test_word_2000.doc}
R inst/extdat/{test_word_6.0_95.doc => pass-through/test_word_6.0_95.doc}
M inst/tinytest/test_wand.R
M man/get_content_type.Rd
M man/guess_content_type.Rd
M NEWS.md => NEWS.md +5 -0
@@ 1,3 1,8 @@
+# 0.5.1
+- new `guess` logical parameter to `get_content_type()` to control 
+  whether it is used as a last resort passthrough (addresses #4)
+- updated external guess database
+
 # 0.5.0
 - {tidytest}
 - CRAN release

M R/get-content-type.R => R/get-content-type.R +17 -7
@@ 10,12 10,14 @@
 #'
 #' @md
 #' @param path path to a file
-#' @param ... passed on to [guess_content_type()]
+#' @param guess if `TRUE` (the default), calls [guess_content_type()] if
+#'        no internal rules match the magic header
+#' @param ... passed on to [guess_content_type()] if `guess` is `TRUE`
 #' @return character vector
 #' @export
 #' @examples
-#' get_content_type(system.file("extdat", "test.pdf", package="wand"))
-get_content_type <- function(path, ...) {
+#' get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
+get_content_type <- function(path, guess = TRUE, ...) {
 
   path <- path.expand(path[1])
   if (!file.exists(path)) stop("File not found.", call.=FALSE)


@@ 28,9 30,14 @@ get_content_type <- function(path, ...) {
   if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")
 
   if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {
-    guessed_name <- guess_content_type(path)
-    if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
+
+    if (guess) {
+      guessed_name <- guess_content_type(path)
+      if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
+    }
+
     return("application/msword")
+
   }
 
   if (all(c(0x25,0x50,0x44,0x46,0x2d,0x31,0x2e) == hdr[1:7])) return("application/pdf")


@@ 107,8 114,10 @@ get_content_type <- function(path, ...) {
     office_type <- check_office(hdr, path)
     if (length(office_type) > 0) return(office_type)
 
-    guessed_name <- guess_content_type(path)
-    if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
+    if (guess) {
+      guessed_name <- guess_content_type(path)
+      if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)
+    }
 
     return("application/zip")
 


@@ 131,6 140,7 @@ get_content_type <- function(path, ...) {
   if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
   if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")
 
+  if (!guess) return("???")
 
   return(guess_content_type(path, ...))
 

M R/guess-content-type.R => R/guess-content-type.R +1 -1
@@ 16,7 16,7 @@
 #' @return character vector
 #' @export
 #' @examples
-#' guess_content_type(system.file("extdat", "test.pdf", package="wand"))
+#' guess_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
 guess_content_type <- function(path, not_found = "???", custom_db = NULL) {
 
   path <- path.expand(path[1])

M R/sysdata.rda => R/sysdata.rda +0 -0

A inst/extdat/no-guess/csv.docx => inst/extdat/no-guess/csv.docx +3 -0
@@ 0,0 1,3 @@
+Kid Name,Weight,Age
+Nakshatra,12,1.5
+Titas,16,6

R inst/extdat/actions.csv => inst/extdat/pass-through/actions.csv +0 -0

R inst/extdat/actions.txt => inst/extdat/pass-through/actions.txt +0 -0

R inst/extdat/actions.xlsx => inst/extdat/pass-through/actions.xlsx +0 -0

R inst/extdat/test.au => inst/extdat/pass-through/test.au +0 -0

R inst/extdat/test.bin => inst/extdat/pass-through/test.bin +0 -0

R inst/extdat/test.bmp => inst/extdat/pass-through/test.bmp +0 -0

R inst/extdat/test.dtd => inst/extdat/pass-through/test.dtd +0 -0

R inst/extdat/test.emf => inst/extdat/pass-through/test.emf +0 -0

R inst/extdat/test.eps => inst/extdat/pass-through/test.eps +0 -0

R inst/extdat/test.fli => inst/extdat/pass-through/test.fli +0 -0

R inst/extdat/test.gif => inst/extdat/pass-through/test.gif +0 -0

R inst/extdat/test.ico => inst/extdat/pass-through/test.ico +0 -0

R inst/extdat/test.jpg => inst/extdat/pass-through/test.jpg +0 -0

R inst/extdat/test.mp3 => inst/extdat/pass-through/test.mp3 +0 -0

R inst/extdat/test.odt => inst/extdat/pass-through/test.odt +0 -0

R inst/extdat/test.ogg => inst/extdat/pass-through/test.ogg +0 -0

R inst/extdat/test.pcx => inst/extdat/pass-through/test.pcx +0 -0

R inst/extdat/test.pdf => inst/extdat/pass-through/test.pdf +0 -0

R inst/extdat/test.pl => inst/extdat/pass-through/test.pl +0 -0

R inst/extdat/test.png => inst/extdat/pass-through/test.png +0 -0

R inst/extdat/test.pnm => inst/extdat/pass-through/test.pnm +0 -0

R inst/extdat/test.ppm => inst/extdat/pass-through/test.ppm +0 -0

R inst/extdat/test.ppt => inst/extdat/pass-through/test.ppt +0 -0

R inst/extdat/test.ps => inst/extdat/pass-through/test.ps +0 -0

R inst/extdat/test.psd => inst/extdat/pass-through/test.psd +0 -0

R inst/extdat/test.py => inst/extdat/pass-through/test.py +0 -0

R inst/extdat/test.rtf => inst/extdat/pass-through/test.rtf +0 -0

R inst/extdat/test.sh => inst/extdat/pass-through/test.sh +0 -0

R inst/extdat/test.tar => inst/extdat/pass-through/test.tar +0 -0

R inst/extdat/test.tar.gz => inst/extdat/pass-through/test.tar.gz +0 -0

R inst/extdat/test.tga => inst/extdat/pass-through/test.tga +0 -0

R inst/extdat/test.txt => inst/extdat/pass-through/test.txt +0 -0

R inst/extdat/test.txt.gz => inst/extdat/pass-through/test.txt.gz +0 -0

R inst/extdat/test.wav => inst/extdat/pass-through/test.wav +0 -0

R inst/extdat/test.wmf => inst/extdat/pass-through/test.wmf +0 -0

R inst/extdat/test.xcf => inst/extdat/pass-through/test.xcf +0 -0

R inst/extdat/test.xml => inst/extdat/pass-through/test.xml +0 -0

R inst/extdat/test.xpm => inst/extdat/pass-through/test.xpm +0 -0

R inst/extdat/test.zip => inst/extdat/pass-through/test.zip +0 -0

R inst/extdat/test_128_44_jstereo.mp3 => inst/extdat/pass-through/test_128_44_jstereo.mp3 +0 -0

R inst/extdat/test_excel.xlsm => inst/extdat/pass-through/test_excel.xlsm +0 -0

R inst/extdat/test_excel.xlsx => inst/extdat/pass-through/test_excel.xlsx +0 -0

R inst/extdat/test_excel_2000.xls => inst/extdat/pass-through/test_excel_2000.xls +0 -0

R inst/extdat/test_excel_spreadsheet.xml => inst/extdat/pass-through/test_excel_spreadsheet.xml +0 -0

R inst/extdat/test_excel_web_archive.mht => inst/extdat/pass-through/test_excel_web_archive.mht +0 -0

R inst/extdat/test_nocompress.tif => inst/extdat/pass-through/test_nocompress.tif +0 -0

R inst/extdat/test_powerpoint.pptm => inst/extdat/pass-through/test_powerpoint.pptm +0 -0

R inst/extdat/test_powerpoint.pptx => inst/extdat/pass-through/test_powerpoint.pptx +0 -0

R inst/extdat/test_word.docm => inst/extdat/pass-through/test_word.docm +0 -0

R inst/extdat/test_word.docx => inst/extdat/pass-through/test_word.docx +0 -0

R inst/extdat/test_word_2000.doc => inst/extdat/pass-through/test_word_2000.doc +0 -0

R inst/extdat/test_word_6.0_95.doc => inst/extdat/pass-through/test_word_6.0_95.doc +0 -0

M inst/tinytest/test_wand.R => inst/tinytest/test_wand.R +5 -1
@@ 58,8 58,12 @@ list(
   ), test.zip = "application/zip"
 ) -> results
 
-fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE)
+fils <- list.files(system.file("extdat", "pass-through", package="wand"), full.names=TRUE)
 tst <- lapply(fils, get_content_type)
 names(tst) <- basename(fils)
 
 for(n in names(tst)) expect_identical(results[[n]], tst[[n]])
+
+no_guess <- system.file("extdat", "no-guess", "csv.docx", package = "wand")
+expect_equal(get_content_type(no_guess, guess = FALSE), "???")
+

M man/get_content_type.Rd => man/get_content_type.Rd +6 -3
@@ 4,12 4,15 @@
 \alias{get_content_type}
 \title{Discover MIME type of a file based on contents}
 \usage{
-get_content_type(path, ...)
+get_content_type(path, guess = TRUE, ...)
 }
 \arguments{
 \item{path}{path to a file}
 
-\item{...}{passed on to \code{\link[=guess_content_type]{guess_content_type()}}}
+\item{guess}{if \code{TRUE} (the default), calls \code{\link[=guess_content_type]{guess_content_type()}} if
+no internal rules match the magic header}
+
+\item{...}{passed on to \code{\link[=guess_content_type]{guess_content_type()}} if \code{guess} is \code{TRUE}}
 }
 \value{
 character vector


@@ 25,5 28,5 @@ comparisons are required/desired. If no match is found, \code{???} is returned
 (see \code{\link[=guess_content_type]{guess_content_type()}} for how to override this behaviour).
 }
 \examples{
-get_content_type(system.file("extdat", "test.pdf", package="wand"))
+get_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
 }

M man/guess_content_type.Rd => man/guess_content_type.Rd +1 -1
@@ 27,5 27,5 @@ return one or more associated types for a given input path. If no match is
 found, \code{???} is returned.
 }
 \examples{
-guess_content_type(system.file("extdat", "test.pdf", package="wand"))
+guess_content_type(system.file("extdat", "pass-through", "test.pdf", package="wand"))
 }