95e723b82401eeaca22f52f50c6e7fe705daedff — boB Rudis 1 year, 1 month ago 0385108
Added better jpeg + new lzip, 7-zip, wasm, pcap, avro, parquet detections
A .vscode/c_cpp_properties.json => .vscode/c_cpp_properties.json +80 -0
@@ 0,0 1,80 @@
+{
+    "configurations": [
+        {
+            "name": "Mac",
+            "includePath": [
+                "${workspaceFolder}",
+                "/Library/Developer/CommandLineTools/usr/include/c++/v1",
+                "/usr/local/include",
+                "/Library/Developer/CommandLineTools/usr/lib/clang/9.0.0/include",
+                "/Library/Developer/CommandLineTools/usr/include",
+                "/usr/include",
+                "/Library/Frameworks/R.framework/Versions/3.5/Resources/library/Rcpp/include",
+                "/Library/Frameworks/R.framework/Versions/3.5/PrivateHeaders",
+                "/Library/Frameworks/R.framework/Versions/3.5/Resources/include"
+            ],
+            "defines": [],
+            "intelliSenseMode": "clang-x64",
+            "browse": {
+                "path": [
+                    "${workspaceFolder}",
+                    "/Library/Developer/CommandLineTools/usr/include/c++/v1",
+                    "/usr/local/include",
+                    "/Library/Developer/CommandLineTools/usr/lib/clang/9.0.0/include",
+                    "/Library/Developer/CommandLineTools/usr/include",
+                    "/usr/include"
+                ],
+                "limitSymbolsToIncludedHeaders": true,
+                "databaseFilename": ""
+            },
+            "macFrameworkPath": [
+                "/System/Library/Frameworks",
+                "/Library/Frameworks"
+            ],
+            "compilerPath": "/usr/bin/clang",
+            "cStandard": "c11",
+            "cppStandard": "c++17"
+        },
+        {
+            "name": "Linux",
+            "includePath": [
+                "/usr/include",
+                "/usr/local/include",
+                "${workspaceFolder}"
+            ],
+            "defines": [],
+            "intelliSenseMode": "clang-x64",
+            "browse": {
+                "path": [
+                    "/usr/include",
+                    "/usr/local/include",
+                    "${workspaceFolder}"
+                ],
+                "limitSymbolsToIncludedHeaders": true,
+                "databaseFilename": ""
+            }
+        },
+        {
+            "name": "Win32",
+            "includePath": [
+                "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include",
+                "${workspaceFolder}"
+            ],
+            "defines": [
+                "_DEBUG",
+                "UNICODE",
+                "_UNICODE"
+            ],
+            "intelliSenseMode": "msvc-x64",
+            "browse": {
+                "path": [
+                    "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include/*",
+                    "${workspaceFolder}"
+                ],
+                "limitSymbolsToIncludedHeaders": true,
+                "databaseFilename": ""
+            }
+        }
+    ],
+    "version": 3
+}<
\ No newline at end of file

M DESCRIPTION => DESCRIPTION +1 -1
@@ 1,7 1,7 @@
 Package: wand
 Type: Package
 Title: Retrieve 'Magic' Attributes from Files and Directories
-Version: 0.3.0
+Version: 0.4.0
 Date: 2018-09-16
 Authors@R: c(
     person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), 

M NEWS.md => NEWS.md +13 -0
@@ 1,3 1,16 @@
+# 0.4.0
+- Enhanced jpeg detection
+- Added distinction between pax and tar
+- Added lzip detection
+- Added 7-zip detection
+- Added wasm file detection
+- Added pcap and pcapng detection
+- Added avro detection
+- Added parquet detection
+
+# 0.3.0
+* Dropped libmagic
+
 # 0.2.0
 * Works on Windows
 

M R/get-content-type.R => R/get-content-type.R +22 -1
@@ 22,6 22,9 @@
 
   hdr <- readBin(path, "raw", n=1024)
 
+  if (all(c(0x4F,0x62,0x6A,0x01) == hdr[1:4])) return("application/vnd.apache.avro+binary")
+  if (all(c(0x50,0x41,0x52,0x31) == hdr[1:4])) return("application/x-parquet")
+
   if (all(c(0xCA,0xFE,0xBA,0xBE) == hdr[1:4])) return("application/java-vm")
 
   if (all(c(0xD0,0xCF,0x11,0xE0,0xA1,0xB1,0x1A,0xE1) == hdr[1:8])) {


@@ 43,6 46,8 @@
   if (all(c(0x49,0x44,0x33) == hdr[1:3])) return("audio/mp3")
   if (all(c(0xAC,0xED) == hdr[1:2])) return("application/x-java-serialized-object")
 
+  if (all(c(0x4c,0x5a,0x49,0x50) == hdr[1:4])) return("application/x-lzip")
+
   if (hdr[1] == 0x3c) { # "<"
     if (all(c(0x68,0x74,0x6d,0x6c) == hdr[2:5])) return("text/html") # "html"
     if (all(c(0x48,0x54,0x4d,0x4c) == hdr[2:5])) return("text/html") # "HTML"


@@ 51,6 56,11 @@
     if (all(c(0x3f,0x78,0x6d,0x6c,0x20) == hdr[2:6])) return("application/xml")
   }
 
+  if (all(c(0x0a,0x0d,0x0d,0x0a) == hdr[1:4])) "application/x-pcapng"
+
+  if (all(c(0xa1,0xb2,0xc3,0xd4) == hdr[1:4]) ||
+      all(c(0xd4,0xc3,0xb2,0xa1) == hdr[1:4])) return("application/x-cap")
+
   if (all(c(0xfe,0xff) == hdr[1:2])) {
     if (all(c(0x00,0x3c,0x00,0x3f,0x00,0x78) == hdr[3:8])) return("application/xml")
   }


@@ 77,6 87,7 @@
     return("application/javascript")
 
   if (all(c(0xFF,0xD8,0xFF) == hdr[1:3])) {
+    if (0xDB == hdr[4]) return("image/jpeg")
     if (0xE0 == hdr[4]) return("image/jpeg")
     if (0xE1 == hdr[4]) {
       if (all(c(0x45,0x78,0x69,0x66,0x00) == hdr[7:11])) return("image/jpeg") # Exif


@@ 103,9 114,19 @@
 
   }
 
+  if (all(c(0x00,0x61,0x73,0x6d) == hdr[1:4])) return("application/wasm")
+
+  if (all(c(0x37,0x7A,0xBC,0xAF,0x27,0x1C) == hdr[1:6])) return("application/x-7z-compressed")
+
   if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe")
 
-  if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) return("application/pax")
+  if (all(c(0x75,0x73,0x74,0x61,0x72) == hdr[258:262])) {
+    if (all(c(0x00,0x30,0x30) == hdr[263:265]) || all(c(0x20,0x20,0x00) == hdr[263:265])) {
+      return("application/tar")
+    } else {
+      return("application/pax")
+    }
+  }
 
   if (all(c(0x00,0x00,0x01,0xBA) == hdr[1:4])) return("video/mpeg")
   if (all(c(0x00,0x00,0x01,0xB3) == hdr[1:4])) return("video/mpeg")

M man/simplemagic_mime_db.Rd => man/simplemagic_mime_db.Rd +1 -1
@@ 1,5 1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/aaa.R
+% Please edit documentation in R/aaa.r
 \docType{data}
 \name{simplemagic_mime_db}
 \alias{simplemagic_mime_db}

M tests/testthat/test-wand.R => tests/testthat/test-wand.R +48 -23
@@ 5,36 5,56 @@
     actions.csv = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
     actions.txt = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
     actions.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-    test_1.2.class = "application/java-vm", test_1.3.class = "application/java-vm",
-    test_1.4.class = "application/java-vm", test_1.5.class = "application/java-vm",
-    test_128_44_jstereo.mp3 = "audio/mp3", test_excel_2000.xls = "application/msword",
-    test_excel_spreadsheet.xml = "application/xml", test_excel_web_archive.mht = "message/rfc822",
-    test_excel.xlsm = "application/zip", test_excel.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-    test_nocompress.tif = "image/tiff", test_powerpoint.pptm = "application/zip",
+    test_1.2.class = "application/java-vm",
+    test_1.3.class = "application/java-vm",
+    test_1.4.class = "application/java-vm",
+    test_1.5.class = "application/java-vm",
+    test_128_44_jstereo.mp3 = "audio/mp3",
+    test_excel_2000.xls = "application/msword",
+    test_excel_spreadsheet.xml = "application/xml",
+    test_excel_web_archive.mht = "message/rfc822",
+    test_excel.xlsm = "application/zip",
+    test_excel.xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    test_nocompress.tif = "image/tiff",
+    test_powerpoint.pptm = "application/zip",
     test_powerpoint.pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-    test_word_2000.doc = "application/msword", test_word_6.0_95.doc = "application/msword",
-    test_word.docm = "application/zip", test_word.docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-    test.au = "audio/basic", test.bin = c(
+    test_word_2000.doc = "application/msword",
+    test_word_6.0_95.doc = "application/msword",
+    test_word.docm = "application/zip",
+    test_word.docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    test.au = "audio/basic",
+    test.bin = c(
       "application/mac-binary",
       "application/macbinary", "application/octet-stream", "application/x-binary",
       "application/x-macbinary"
-    ), test.bmp = "image/bmp", test.dtd = "application/xml-dtd",
-    test.emf = "application/x-msmetafile", test.eps = "application/postscript",
-    test.fli = c("video/flc", "video/fli", "video/x-fli"), test.gif = "image/gif",
-    test.ico = "image/x-icon", test.java = c(
+    ), test.bmp = "image/bmp",
+    test.dtd = "application/xml-dtd",
+    test.emf = "application/x-msmetafile",
+    test.eps = "application/postscript",
+    test.fli = c("video/flc", "video/fli", "video/x-fli"),
+    test.gif = "image/gif",
+    test.ico = "image/x-icon",
+    test.java = c(
       "text/plain", "text/x-java",
       "text/x-java-source"
-    ), test.jpg = "image/jpeg", test.mp3 = "audio/mp3",
-    test.odt = "application/vnd.oasis.opendocument.text", test.ogg = c(
+    ), test.jpg = "image/jpeg",
+    test.mp3 = "audio/mp3",
+    test.odt = "application/vnd.oasis.opendocument.text",
+    test.ogg = c(
       "application/ogg",
       "audio/ogg"
-    ), test.pcx = c("image/pcx", "image/x-pcx"), test.pdf = "application/pdf",
-    test.pl = c("text/plain", "text/x-perl", "text/x-script.perl"), test.png = "image/png", test.pnm = c(
+    ), test.pcx = c("image/pcx", "image/x-pcx"),
+    test.pdf = "application/pdf",
+    test.pl = c("text/plain", "text/x-perl", "text/x-script.perl"),
+    test.png = "image/png",
+    test.pnm = c(
       "application/x-portable-anymap",
       "image/x-portable-anymap"
     ), test.ppm = "image/x-portable-pixmap",
-    test.ppt = "application/msword", test.ps = "application/postscript",
-    test.psd = "image/photoshop", test.py = c(
+    test.ppt = "application/msword",
+    test.ps = "application/postscript",
+    test.psd = "image/photoshop",
+    test.py = c(
       "text/x-python",
       "text/x-script.phyton"
     ), test.rtf = c(


@@ 44,16 64,21 @@
       "application/x-bsh",
       "application/x-sh", "application/x-shar", "text/x-script.sh",
       "text/x-sh"
-    ), test.tar = "application/pax", test.tar.gz = c(
+    ), test.tar = "application/tar",
+    test.tar.gz = c(
       "application/octet-stream",
       "application/x-compressed", "application/x-gzip"
     ), test.tga = "image/x-tga",
-    test.txt = "text/plain", test.txt.gz = c(
+    test.txt = "text/plain",
+    test.txt.gz = c(
       "application/octet-stream",
       "application/x-compressed", "application/x-gzip"
     ), test.wav = "audio/x-wav",
-    test.wmf = c("application/x-msmetafile", "windows/metafile"), test.xcf = "application/x-xcf", test.xml = "application/xml",
-    test.xpm = c("image/x-xbitmap", "image/x-xpixmap", "image/xpm"), test.zip = "application/zip"
+    test.wmf = c("application/x-msmetafile", "windows/metafile"),
+    test.xcf = "application/x-xcf",
+    test.xml = "application/xml",
+    test.xpm = c("image/x-xbitmap", "image/x-xpixmap", "image/xpm"),
+    test.zip = "application/zip"
   ) -> results
 
   fils <- list.files(system.file("extdat", package="wand"), full.names=TRUE)