~hrbrmstr/wand

95e723b82401eeaca22f52f50c6e7fe705daedff — boB Rudis 1 year, 10 months ago 0385108
Added better jpeg + new lzip, 7-zip, wasm, pcap, avro, parquet detections
A .vscode/c_cpp_properties.json => .vscode/c_cpp_properties.json +80 -0
@@ 0,0 1,80 @@
{
    "configurations": [
        {
            "name": "Mac",
            "includePath": [
                "${workspaceFolder}",
                "/Library/Developer/CommandLineTools/usr/include/c++/v1",
                "/usr/local/include",
                "/Library/Developer/CommandLineTools/usr/lib/clang/9.0.0/include",
                "/Library/Developer/CommandLineTools/usr/include",
                "/usr/include",
                "/Library/Frameworks/R.framework/Versions/3.5/Resources/library/Rcpp/include",
                "/Library/Frameworks/R.framework/Versions/3.5/PrivateHeaders",
                "/Library/Frameworks/R.framework/Versions/3.5/Resources/include"
            ],
            "defines": [],
            "intelliSenseMode": "clang-x64",
            "browse": {
                "path": [
                    "${workspaceFolder}",
                    "/Library/Developer/CommandLineTools/usr/include/c++/v1",
                    "/usr/local/include",
                    "/Library/Developer/CommandLineTools/usr/lib/clang/9.0.0/include",
                    "/Library/Developer/CommandLineTools/usr/include",
                    "/usr/include"
                ],
                "limitSymbolsToIncludedHeaders": true,
                "databaseFilename": ""
            },
            "macFrameworkPath": [
                "/System/Library/Frameworks",
                "/Library/Frameworks"
            ],
            "compilerPath": "/usr/bin/clang",
            "cStandard": "c11",
            "cppStandard": "c++17"
        },
        {
            "name": "Linux",
            "includePath": [
                "/usr/include",
                "/usr/local/include",
                "${workspaceFolder}"
            ],
            "defines": [],
            "intelliSenseMode": "clang-x64",
            "browse": {
                "path": [
                    "/usr/include",
                    "/usr/local/include",
                    "${workspaceFolder}"
                ],
                "limitSymbolsToIncludedHeaders": true,
                "databaseFilename": ""
            }
        },
        {
            "name": "Win32",
            "includePath": [
                "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include",
                "${workspaceFolder}"
            ],
            "defines": [
                "_DEBUG",
                "UNICODE",
                "_UNICODE"
            ],
            "intelliSenseMode": "msvc-x64",
            "browse": {
                "path": [
                    "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include/*",
                    "${workspaceFolder}"
                ],
                "limitSymbolsToIncludedHeaders": true,
                "databaseFilename": ""
            }
        }
    ],
    "version": 3
}
\ No newline at end of file

M DESCRIPTION => DESCRIPTION +1 -1
@@ 1,7 1,7 @@
Package: wand
Type: Package
Title: Retrieve 'Magic' Attributes from Files and Directories
Version: 0.3.0
Version: 0.4.0
Date: 2018-09-16
Authors@R: c(
    person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), 

M NEWS.md => NEWS.md +13 -0
@@ 1,3 1,16 @@
# 0.4.0
- Enhanced jpeg detection
- Added distinction between pax and tar
- Added lzip detection
- Added 7-zip detection
- Added wasm file detection
- Added pcap and pcapng detection
- Added avro detection
- Added parquet detection

# 0.3.0
* Dropped libmagic

# 0.2.0
* Works on Windows


M R/get-content-type.R => R/get-content-type.R +22 -1
@@ 22,6 22,9 @@ get_content_type <- function(path) {

  hdr <- readBin(path, "raw", n=1024)

  if (all(c(0x4F,0x62,0x6A,0x01) == hdr[1:4])) return("application/vnd.apache.avro+binary")
  if (all(c(0x50,0x41,0x52,0x31) == hdr[1:4])) return("application/x-parquet")

  if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")

  if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {


@@ 43,6 46,8 @@ get_content_type <- function(path) {
  if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3")
  if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object")

  if (all(c(0x4c,0x5a,0x49,0x50) == hdr[1:4])) return("application/x-lzip")

  if (hdr[1] == 0x3c) { # "<"
    if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html"
    if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML"


@@ 51,6 56,11 @@ get_content_type <- function(path) {
    if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml")
  }

  if (all(c(0x0a,0x0d,0x0d,0x0a) == hdr[1:4])) "application/x-pcapng"

  if (all(c(0xa1,0xb2,0xc3,0xd4) == hdr[1:4]) ||
      all(c(0xd4,0xc3,0xb2,0xa1) == hdr[1:4])) return("application/x-cap")

  if (all(c(0xfe,0xff) == hdr[1:2])) {
    if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml")
  }


@@ 77,6 87,7 @@ get_content_type <- function(path) {
    return("application/javascript")

  if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) {
    if (0xDB == hdr[4]) return("image/jpeg")
    if (0xE0 == hdr[4]) return("image/jpeg")
    if (0xE1 == hdr[4]) {
      if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif


@@ 103,9 114,19 @@ get_content_type <- function(path) {

  }

  if (all(c(0x00,0x61,0x73,0x6d) == hdr[1:4])) return("application/wasm")

  if (all(c(0x37,0x7A,0xBC,0xAF,0x27,0x1C) == hdr[1:6])) return("application/x-7z-compressed")

  if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe")

  if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) return("application/pax")
  if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) {
    if (all(c(0x00,0x30,0x30) == hdr[263:265]) || all(c(0x20,0x20,0x00) == hdr[263:265])) {
      return("application/tar")
    } else {
      return("application/pax")
    }
  }

  if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
  if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")

M man/simplemagic_mime_db.Rd => man/simplemagic_mime_db.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/aaa.R
% Please edit documentation in R/aaa.r
\docType{data}
\name{simplemagic_mime_db}
\alias{simplemagic_mime_db}

M tests/testthat/test-wand.R => tests/testthat/test-wand.R +48 -23
@@ 5,36 5,56 @@ test_that("Basic file tests work", {
    actions.csv = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    actions.txt = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    actions.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    test_1.2.class = "application/java-vm", test_1.3.class = "application/java-vm",
    test_1.4.class = "application/java-vm", test_1.5.class = "application/java-vm",
    test_128_44_jstereo.mp3 = "audio/mp3", test_excel_2000.xls = "application/msword",
    test_excel_spreadsheet.xml = "application/xml", test_excel_web_archive.mht = "message/rfc822",
    test_excel.xlsm = "application/zip", test_excel.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    test_nocompress.tif = "image/tiff", test_powerpoint.pptm = "application/zip",
    test_1.2.class = "application/java-vm",
    test_1.3.class = "application/java-vm",
    test_1.4.class = "application/java-vm",
    test_1.5.class = "application/java-vm",
    test_128_44_jstereo.mp3 = "audio/mp3",
    test_excel_2000.xls = "application/msword",
    test_excel_spreadsheet.xml = "application/xml",
    test_excel_web_archive.mht = "message/rfc822",
    test_excel.xlsm = "application/zip",
    test_excel.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    test_nocompress.tif = "image/tiff",
    test_powerpoint.pptm = "application/zip",
    test_powerpoint.pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    test_word_2000.doc = "application/msword", test_word_6.0_95.doc = "application/msword",
    test_word.docm = "application/zip", test_word.docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    test.au = "audio/basic", test.bin = c(
    test_word_2000.doc = "application/msword",
    test_word_6.0_95.doc = "application/msword",
    test_word.docm = "application/zip",
    test_word.docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    test.au = "audio/basic",
    test.bin = c(
      "application/mac-binary",
      "application/macbinary", "application/octet-stream", "application/x-binary",
      "application/x-macbinary"
    ), test.bmp = "image/bmp", test.dtd = "application/xml-dtd",
    test.emf = "application/x-msmetafile", test.eps = "application/postscript",
    test.fli = c("video/flc", "video/fli", "video/x-fli"), test.gif = "image/gif",
    test.ico = "image/x-icon", test.java = c(
    ), test.bmp = "image/bmp",
    test.dtd = "application/xml-dtd",
    test.emf = "application/x-msmetafile",
    test.eps = "application/postscript",
    test.fli = c("video/flc", "video/fli", "video/x-fli"),
    test.gif = "image/gif",
    test.ico = "image/x-icon",
    test.java = c(
      "text/plain", "text/x-java",
      "text/x-java-source"
    ), test.jpg = "image/jpeg", test.mp3 = "audio/mp3",
    test.odt = "application/vnd.oasis.opendocument.text", test.ogg = c(
    ), test.jpg = "image/jpeg",
    test.mp3 = "audio/mp3",
    test.odt = "application/vnd.oasis.opendocument.text",
    test.ogg = c(
      "application/ogg",
      "audio/ogg"
    ), test.pcx = c("image/pcx", "image/x-pcx"), test.pdf = "application/pdf",
    test.pl = c("text/plain", "text/x-perl", "text/x-script.perl"), test.png = "image/png", test.pnm = c(
    ), test.pcx = c("image/pcx", "image/x-pcx"),
    test.pdf = "application/pdf",
    test.pl = c("text/plain", "text/x-perl", "text/x-script.perl"),
    test.png = "image/png",
    test.pnm = c(
      "application/x-portable-anymap",
      "image/x-portable-anymap"
    ), test.ppm = "image/x-portable-pixmap",
    test.ppt = "application/msword", test.ps = "application/postscript",
    test.psd = "image/photoshop", test.py = c(
    test.ppt = "application/msword",
    test.ps = "application/postscript",
    test.psd = "image/photoshop",
    test.py = c(
      "text/x-python",
      "text/x-script.phyton"
    ), test.rtf = c(


@@ 44,16 64,21 @@ test_that("Basic file tests work", {
      "application/x-bsh",
      "application/x-sh", "application/x-shar", "text/x-script.sh",
      "text/x-sh"
    ), test.tar = "application/pax", test.tar.gz = c(
    ), test.tar = "application/tar",
    test.tar.gz = c(
      "application/octet-stream",
      "application/x-compressed", "application/x-gzip"
    ), test.tga = "image/x-tga",
    test.txt = "text/plain", test.txt.gz = c(
    test.txt = "text/plain",
    test.txt.gz = c(
      "application/octet-stream",
      "application/x-compressed", "application/x-gzip"
    ), test.wav = "audio/x-wav",
    test.wmf = c("application/x-msmetafile", "windows/metafile"), test.xcf = "application/x-xcf", test.xml = "application/xml",
    test.xpm = c("image/x-xbitmap", "image/x-xpixmap", "image/xpm"), test.zip = "application/zip"
    test.wmf = c("application/x-msmetafile", "windows/metafile"),
    test.xcf = "application/x-xcf",
    test.xml = "application/xml",
    test.xpm = c("image/x-xbitmap", "image/x-xpixmap", "image/xpm"),
    test.zip = "application/zip"
  ) -> results

  fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE)