~lucasemmoreira/pdfminer

c85f1b4bfa9f0fa41e6a9985848c8792418ac184 — Lucas E M Moreira 1 year, 2 months ago
initial commit
8 files changed, 134 insertions(+), 0 deletions(-)

A .gitignore
A LICENSE
A README.md
A doc/intro.md
A makefile
A project.clj
A src/pdfminer/core.clj
A test/pdfminer/core_test.clj
A  => .gitignore +14 -0
@@ 1,14 @@
/target
/classes
/checkouts
profiles.clj
pom.xml
pom.xml.asc
*.jar
*.class
/.lein-*
/.nrepl-port
/.prepl-port
.hgignore
.hg/
*.pdf
\ No newline at end of file

A  => LICENSE +13 -0
@@ 1,13 @@
Copyright (c) 2023 Lucas Esperancini Moreira e Moreira <me@lucasemmoreira.xyz>

Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

A  => README.md +22 -0
@@ 1,22 @@
# pdfminer

A simple jar to extract pdf content to text

## Installation

The idea is to generate a jar. Here I use leiningen:

```
make uberjar
```

## Usage

As simple as one can expect. Just run with your desired file

    $ java -jar pdfminer-0.1.0-standalone.jar resources/filetouse.pdf






A  => doc/intro.md +3 -0
@@ 1,3 @@
# Introduction to pdfminer

TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)

A  => makefile +7 -0
@@ 1,7 @@
test:
	lein uberjar
	java -jar target/default+uberjar/pdfminer-0.1.0-standalone.jar resources/boleto.pdf

uberjar:
	lein uberjar
	cp target/default+uberjar/pdfminer-0.1.0-standalone.jar 

A  => project.clj +12 -0
@@ 1,12 @@
(defproject pdfminer "0.1.0"
  :description "FIXME: write description"
  :url "http://example.com/FIXME"
  :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
            :url "https://www.eclipse.org/legal/epl-2.0/"}
  :dependencies [[org.clojure/clojure "1.11.1"]
                 [org.clojure/tools.cli "1.0.214"]
                 [pdfboxing "0.1.14"]]
  :main ^:skip-aot pdfminer.core
  :target-path "target/%s"
  :profiles {:uberjar {:aot :all
                       :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}})

A  => src/pdfminer/core.clj +56 -0
@@ 1,56 @@
(ns pdfminer.core
  (:require [clojure.tools.cli :refer [parse-opts]]
            [clojure.string :as string]
            [pdfboxing.text :refer [extract]])
  (:gen-class))


(defn usage [options-summary]
  (->> ["This is my program. There are many like it, but this one is mine."
        ""
        "Usage: program-name [options] action"
        ""
        "Options:"
        options-summary
        ""
        "Actions:"
        "  start    Start a new server"
        "  stop     Stop an existing server"
        "  status   Print a server's status"
        ""
        "Please refer to the manual page for more information."]
       (string/join \newline)))

(defn error-msg [errors]
  (str "The following errors occurred while parsing your command:\n\n"
       (string/join \newline errors)))

(def cli-options
  [["-h" "--help"]])

(defn validate-args
  "Validate command line arguments. Either return a map indicating the program
  should exit (with an error message, and optional ok status), or a map
  indicating the action the program should take and the options provided."
  [args]
  (let [{:keys [options arguments errors summary]} (parse-opts args cli-options)]
    (cond
      (:help options) ; help => exit OK with usage summary
      {:exit-message (usage summary) :ok? true}
      errors ; errors => exit with description of errors
      {:exit-message (error-msg errors)}
      ;; custom validation on arguments
      (and (= 1 (count arguments)))
      {:file (first arguments) :options options}
      :else ; failed custom validation => exit with usage summary
      {:exit-message (usage summary)})))

(defn exit [status msg]
  (println msg)
  (System/exit status))

(defn -main [& args]
  (let [{:keys [file options exit-message ok?]} (validate-args args)]
    (if exit-message
      (exit (if ok? 0 1) exit-message)
      (println (extract file)))))

A  => test/pdfminer/core_test.clj +7 -0
@@ 1,7 @@
(ns pdfminer.core-test
  (:require [clojure.test :refer :all]
            [pdfminer.core :refer :all]))

(deftest a-test
  (testing "FIXME, I fail."
    (is (= 0 1))))