M .Rbuildignore => .Rbuildignore +1 -0
@@ 1,3 1,4 @@
+^LICENSE\.md$
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
M .travis.yml => .travis.yml +3 -3
@@ 18,12 18,12 @@ addons:
- oracle-java8-set-default
before_install:
- - docker pull drill/apache-drill:1.14.0
- - docker run -i --name drill-1.14.0 -p 8047:8047 --detach -t drill/apache-drill:1.14.0 /bin/bash
+ - docker pull drill/apache-drill:1.16.0
+ - docker run -i --name drill-1.16.0 -p 8047:8047 --detach -t drill/apache-drill:1.16.0 /bin/bash
- docker ps -a
before_script:
- - sleep 5
+ - sleep 10
after_success:
- Rscript -e 'covr::codecov()'
M DESCRIPTION => DESCRIPTION +11 -5
@@ 6,18 6,21 @@ Authors@R: c(
comment = c(ORCID = "0000-0001-5670-2640")),
person("Edward", "Visel", email = "edward.visel@gmail.com", role = "ctb"),
person("Andy", "Hine", email = "andyyhine@gmail.com", role = "ctb"),
- person("Scott", "Came", email = "scottcame10@gmail.com", role = "ctb")
+ person("Scott", "Came", email = "scottcame10@gmail.com", role = "ctb"),
+ person("David", "Severski", email = "davidski@deadheaven.com", role = "ctb",
+ comment = c(ORCID = "https://orcid.org/0000-0001-7867-0459"))
)
Description: 'Apache Drill' is a low-latency distributed query engine designed to enable
data exploration and 'analytics' on both relational and non-relational 'datastores',
scaling to petabytes of data. Methods are provided that enable working with 'Apache'
'Drill' instances via the 'REST' 'API', 'JDBC' interface (optional), 'DBI' 'methods'
- and using 'dplyr'/'dbplyr' idioms.
+ and using 'dplyr'/'dbplyr' idioms. Helper functions are included to facilitate
+ using official 'Drill' 'Docker' images/containers.
Depends:
- R (>= 3.1.2)
+ R (>= 3.2.0)
URL: https://gitlab.com/hrbrmstr/sergeant
BugReports: https://gitlab.com/hrbrmstr/sergeant/issues
-License: GPL-3 + file LICENSE
+License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Imports:
@@ 36,6 39,9 @@ Imports:
magrittr (>= 1.5)
Suggests:
DT (>= 0.5),
+ stevedore,
testthat (>= 1.0.2),
- covr (>= 3.0.0)
+ covr (>= 3.0.0),
+ DBItest
RoxygenNote: 6.1.1
+Roxygen: list(markdown = TRUE)
M LICENSE => LICENSE +2 -674
@@ 1,674 1,2 @@
-GNU GENERAL PUBLIC LICENSE
-Version 3, 29 June 2007
-
-Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-
-Preamble
-
-The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
-The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
-When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
-For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
-Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
-For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
-The precise terms and conditions for copying, distribution and
-modification follow.
-
-TERMS AND CONDITIONS
-
-0. Definitions.
-
-"This License" refers to version 3 of the GNU General Public License.
-
-"Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-"The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
-To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-1. Source Code.
-
-The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
-A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-The Corresponding Source for a work in source code form is that
-same work.
-
-2. Basic Permissions.
-
-All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-4. Conveying Verbatim Copies.
-
-You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-5. Conveying Modified Source Versions.
-
-You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-a) The work must carry prominent notices stating that you modified
-it, and giving a relevant date.
-
-b) The work must carry prominent notices stating that it is
-released under this License and any conditions added under section
-7. This requirement modifies the requirement in section 4 to
-"keep intact all notices".
-
-c) You must license the entire work, as a whole, under this
-License to anyone who comes into possession of a copy. This
-License will therefore apply, along with any applicable section 7
-additional terms, to the whole of the work, and all its parts,
-regardless of how they are packaged. This License gives no
-permission to license the work in any other way, but it does not
-invalidate such permission if you have separately received it.
-
-d) If the work has interactive user interfaces, each must display
-Appropriate Legal Notices; however, if the Program has interactive
-interfaces that do not display Appropriate Legal Notices, your
-work need not make them do so.
-
-A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-6. Conveying Non-Source Forms.
-
-You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-a) Convey the object code in, or embodied in, a physical product
-(including a physical distribution medium), accompanied by the
-Corresponding Source fixed on a durable physical medium
-customarily used for software interchange.
-
-b) Convey the object code in, or embodied in, a physical product
-(including a physical distribution medium), accompanied by a
-written offer, valid for at least three years and valid for as
-long as you offer spare parts or customer support for that product
-model, to give anyone who possesses the object code either (1) a
-copy of the Corresponding Source for all the software in the
-product that is covered by this License, on a durable physical
-medium customarily used for software interchange, for a price no
-more than your reasonable cost of physically performing this
-conveying of source, or (2) access to copy the
-Corresponding Source from a network server at no charge.
-
-c) Convey individual copies of the object code with a copy of the
-written offer to provide the Corresponding Source. This
-alternative is allowed only occasionally and noncommercially, and
-only if you received the object code with such an offer, in accord
-with subsection 6b.
-
-d) Convey the object code by offering access from a designated
-place (gratis or for a charge), and offer equivalent access to the
-Corresponding Source in the same way through the same place at no
-further charge. You need not require recipients to copy the
-Corresponding Source along with the object code. If the place to
-copy the object code is a network server, the Corresponding Source
-may be on a different server (operated by you or a third party)
-that supports equivalent copying facilities, provided you maintain
-clear directions next to the object code saying where to find the
-Corresponding Source. Regardless of what server hosts the
-Corresponding Source, you remain obligated to ensure that it is
-available for as long as needed to satisfy these requirements.
-
-e) Convey the object code using peer-to-peer transmission, provided
-you inform other peers where the object code and Corresponding
-Source of the work are being offered to the general public at no
-charge under subsection 6d.
-
-A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-"Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-7. Additional Terms.
-
-"Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-a) Disclaiming warranty or limiting liability differently from the
-terms of sections 15 and 16 of this License; or
-
-b) Requiring preservation of specified reasonable legal notices or
-author attributions in that material or in the Appropriate Legal
-Notices displayed by works containing it; or
-
-c) Prohibiting misrepresentation of the origin of that material, or
-requiring that modified versions of such material be marked in
-reasonable ways as different from the original version; or
-
-d) Limiting the use for publicity purposes of names of licensors or
-authors of the material; or
-
-e) Declining to grant rights under trademark law for use of some
-trade names, trademarks, or service marks; or
-
-f) Requiring indemnification of licensors and authors of that
-material by anyone who conveys the material (or modified versions of
-it) with contractual assumptions of liability to the recipient, for
-any liability that these contractual assumptions directly impose on
-those licensors and authors.
-
-All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-8. Termination.
-
-You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-9. Acceptance Not Required for Having Copies.
-
-You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-10. Automatic Licensing of Downstream Recipients.
-
-Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
-An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-11. Patents.
-
-A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
-A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-12. No Surrender of Others' Freedom.
-
-If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-13. Use with the GNU Affero General Public License.
-
-Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
-14. Revised Versions of this License.
-
-The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-15. Disclaimer of Warranty.
-
-THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-16. Limitation of Liability.
-
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-17. Interpretation of Sections 15 and 16.
-
-If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-END OF TERMS AND CONDITIONS
-
-How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-<one line to give the program's name and a brief idea of what it does.>
-Copyright (C) <year> <name of author>
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-<program> Copyright (C) <year> <name of author>
-This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-This is free software, and you are welcome to redistribute it
-under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+YEAR: 2019
+COPYRIGHT HOLDER: Bob Rudis
A LICENSE.md => LICENSE.md +21 -0
@@ 0,0 1,21 @@
+# MIT License
+
+Copyright (c) 2019 Bob Rudis
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
M NAMESPACE => NAMESPACE +12 -0
@@ 5,19 5,24 @@ S3method(db_data_type,DrillConnection)
S3method(db_desc,src_drill)
S3method(db_explain,DrillConnection)
S3method(db_query_fields,DrillConnection)
+S3method(format,DrillConnection)
+S3method(print,drill_conn)
S3method(sql_escape_ident,DrillConnection)
S3method(sql_translate_env,DrillConnection)
S3method(src_tbls,src_drill)
S3method(tbl,src_drill)
export("%>%")
export(Drill)
+export(ctas_profile)
export(drill_active)
export(drill_cancel)
export(drill_connection)
+export(drill_down)
export(drill_functions)
export(drill_metrics)
export(drill_mod_storage)
export(drill_options)
+export(drill_opts)
export(drill_profile)
export(drill_profiles)
export(drill_query)
@@ 31,9 36,12 @@ export(drill_status)
export(drill_storage)
export(drill_system_reset)
export(drill_threads)
+export(drill_up)
export(drill_uplift)
export(drill_use)
export(drill_version)
+export(killall_drill)
+export(showall_drill)
export(src_drill)
export(tbl)
exportClasses(DrillConnection)
@@ 44,10 52,12 @@ exportMethods(dbConnect)
exportMethods(dbDataType)
exportMethods(dbDisconnect)
exportMethods(dbFetch)
+exportMethods(dbGetInfo)
exportMethods(dbGetStatement)
exportMethods(dbHasCompleted)
exportMethods(dbIsValid)
exportMethods(dbListFields)
+exportMethods(dbSendQuery)
exportMethods(dbUnloadDriver)
import(DBI)
import(bit64)
@@ 71,6 81,7 @@ importFrom(dbplyr,win_recycled)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
+importFrom(dplyr,collect)
importFrom(dplyr,copy_to)
importFrom(dplyr,data_frame)
importFrom(dplyr,db_data_type)
@@ 90,4 101,5 @@ importFrom(purrr,"%>%")
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map2_df)
+importFrom(readr,type_convert)
importFrom(scales,comma)
M NEWS.md => NEWS.md +6 -0
@@ 12,6 12,12 @@
- License change to GPL-3 since the pkg now uses code from bibrquery
- New `option()` => "`sergeant.bigint.warnonce`" to control `BIGINT`
warnings. Use `options(sergeant.bigint.warnonce = FALSE)`
+- Fixed bug in `dbListFields()` (`DrillConnection-class`)
+- Added `as.integer64()` `dplyr` custom Drill function to convert
+ columns to `BIGINT`
+- Added `ctas_profile()` to automagically create a CTAS query from
+ a Drill `tbl` (Ref: #29 / inspired by David Severski)
+- Added more tests to cover DBI and extended d[b]plyr operations
# sergeant 0.7.0-BETA
M R/aaa.r => R/aaa.r +0 -1
@@ 8,4 8,3 @@ make_server <- function(drill_con) {
drill_con$host, drill_con$port)
}
-#
A R/ctas-profile.R => R/ctas-profile.R +106 -0
@@ 0,0 1,106 @@
+#' Generate a Drill CTAS Statement from a Query
+#'
+#' When working with CSV\[H] files in Drill 1.15.0+ everything comes back
+#' `VARCHAR` since that's the way it should be. The old behaviour of
+#' `sergeant` to auto-type convert was kinda horribad wrong. However,
+#' it's a royal pain to make [`CTAS`](https://drill.apache.org/docs/create-table-as-ctas/)
+#' queries from a giant list of `VARCHAR` field by hand. So, this is a
+#' helper function to do that, inspired by David Severski.
+#'
+#' @note WIP!
+#' @md
+#' @param x a `tbl`
+#' @param new_table_name a new Drill data source spec (e.g. \code{dfs.xyz.`a.parquet`})
+#' @export
+#' @examples \dontrun{
+#' db <- src_drill("localhost")
+#'
+#' # Test with bare data source
+#' flt1 <- tbl(db, "dfs.d.`/flights.csvh`")
+#'
+#' cat(ctas_profile(flt1))
+#'
+#' # Test with SELECT
+#' flt2 <- tbl(db, sql("SELECT `year`, tailnum, time_hour FROM dfs.d.`/flights.csvh`"))
+#'
+#' cat(ctas_profile(flt2, "dfs.d.`flights.parquet`"))
+#'
+#' }
+ctas_profile <- function(x, new_table_name = "CHANGE____ME") {
+
+ stopifnot(inherits(x, "tbl_drill"))
+
+ vals_orig <- dplyr::collect(head(x))
+
+ vals <- suppressMessages(readr::type_convert(vals_orig))
+
+ data_type <- function(x) {
+ switch(
+ class(x)[1],
+ integer64 = "BIGINT",
+ logical = "BOOLEAN",
+ integer = "INTEGER",
+ numeric = "DOUBLE",
+ factor = "VARCHAR",
+ character = "VARCHAR",
+ Date = "DATE",
+ POSIXct = "TIMESTAMP",
+ stop("Can't map type ", paste(class(x), collapse = "/"),
+ " to a supported database type.")
+ )
+ }
+
+ field_types <- vapply(vals, data_type, character(1))
+
+ ctr <- 0
+
+ mapply(function(fn, ft) {
+
+ if (ft %in% c("DATE", "TIMESTAMP")) {
+
+ ctr <<- ctr + 1
+ cmt <- "*NOTE* You need to specify the format string. Sample character data is: [%s]. "
+ cmt <- sprintf(cmt, vals_orig[[fn]][[1]])
+
+ sprintf(" TO_TIMESTAMP(`%s`, 'FORMATSTRING') AS `%s` -- %s", fn, fn, cmt)
+
+ } else {
+ sprintf(" CAST(`%s` AS %s) AS `%s`", fn, ft, fn)
+ }
+
+ }, names(field_types), field_types, SIMPLIFY = TRUE, USE.NAMES = FALSE) -> casts
+
+ casts <- unlist(strsplit(paste0(casts, collapse=",\n"), "\n"))
+
+ orig_query <- x$ops$x
+
+ if (!grepl("select", orig_query, ignore.case=TRUE)) {
+ orig_query <- sprintf("SELECT * FROM %s", orig_query)
+ }
+
+ sprintf(
+ "CREATE TABLE %s AS\nSELECT\n%s\nFROM (%s)\n",
+ new_table_name,
+ paste0(casts, collapse="\n"),
+ orig_query
+ ) -> out
+
+ paste0(c(
+ "-- ** Created by ctas_profile() in the R sergeant package, version ",
+ toString(packageVersion("sergeant")), " **\n\n", out
+ ), collapse="") -> out
+
+ if (ctr > 0) {
+ sprintf(
+ paste0(c(
+ "%s\n\n-- TIMESTAMP and/or DATE columns were detected.\n",
+ "Drill's date/time format string reference can be found at:\n--\n",
+ "-- <http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html>"
+ ), collapse=""),
+ out
+ ) -> out
+ }
+
+ out
+
+}
M R/custom.r => R/custom.r +1 -0
@@ 17,6 17,7 @@
#' the link found in the `DESCRIPTION` file.
#'
#' - `as.character(x)` : `CAST( x AS CHARACTER )`
+#' - `as.integer64(x)` : `CAST( x AS BIGINT )`
#' - `as.date(x)` : `CAST( x AS DATE )`
#' - `as.logical(x)` : `CAST( x AS BOOLEAN) `
#' - `as.numeric(x)` : `CAST( x AS DOUBLE )`
M R/dbi.r => R/dbi.r +116 -20
@@ 97,7 97,7 @@ setMethod(
setMethod(
"dbDisconnect",
"DrillConnection", function(conn, ...) {
- TRUE
+ invisible(TRUE)
},
valueClass = "logical"
)
@@ 126,6 126,7 @@ cmake_server <- function(conn) {
#' @param conn connection
#' @param statement SQL statement
#' @param ... passed on to methods
+#' @export
#' @family Drill REST DBI API
#' @aliases dbSendQuery,DrillConnection,character-method
setMethod(
@@ 219,7 220,8 @@ setMethod(
warning(resp, call.=FALSE)
- return(dplyr::data_frame())
+ xdf <- dplyr::tibble()
+ return(xdf)
} else {
@@ 231,7 233,50 @@ setMethod(
# ** only available in Drill 1.15.0+ **
# properly arrange columns
- if (length(out$columns) != 0) xdf <- xdf[,out$columns,drop=FALSE]
+ if (length(out$columns) != 0) {
+ if (is.data.frame(xdf)) {
+ if (nrow(xdf) > 0) xdf <- xdf[,out$columns,drop=FALSE]
+ } else {
+ lapply(1:length(out$columns), function(col_idx) {
+ ctype <- out$metadata[col_idx]
+ if (ctype == "INT") {
+ integer(0)
+ } else if (ctype == "VARCHAR") {
+ character(0)
+ } else if (ctype == "TIMESTAMP") {
+ cx <- integer(0)
+ class(cx) <- "POSIXct"
+ cx
+ } else if (ctype == "BIGINT") {
+ integer64(0)
+ } else if (ctype == "BINARY") {
+ character(0)
+ } else if (ctype == "BOOLEAN") {
+ logical(0)
+ } else if (ctype == "DATE") {
+ cx <- integer(0)
+ class(cx) <- "Date"
+ cx
+ } else if (ctype == "FLOAT") {
+ numeric(0)
+ } else if (ctype == "DOUBLE") {
+ double(0)
+ } else if (ctype == "TIME") {
+ character(0)
+ } else if (ctype == "INTERVAL") {
+ character(0)
+ } else {
+ character(0)
+ }
+ }) -> xdf
+ xdf <- set_names(xdf, out$columns)
+ class(xdf) <- c("data.frame")
+ return(xdf)
+ }
+ } else {
+ xdf <- dplyr::tibble()
+ return(xdf)
+ }
# ** only available in Drill 1.15.0+ **
# be smarter about type conversion now that the REST API provides
@@ 255,7 300,7 @@ setMethod(
"R ODBC interface to Apache Drill with the MapR ODBC drivers.\n\n",
"This informational warning will only be shown once per R session and ",
"you can disable them from appearing by setting the 'sergeant.bigint.warnonce' ",
- "to 'FALSE' (i.e. options(sergeant.bigint.warnonce = FALSE)).",
+ "option to 'FALSE' (i.e. options(sergeant.bigint.warnonce = FALSE)).",
call.=FALSE
)
}
@@ 279,7 324,7 @@ setMethod(
ctype == "FLOAT" ~ "d",
ctype == "DOUBLE" ~ "d",
ctype == "TIME" ~ "c",
- ctype == "INTERVAL" ~ "c",
+ ctype == "INTERVAL" ~ "?",
TRUE ~ "?"
)
@@ 324,13 369,16 @@ setMethod(
"dbDataType",
"DrillConnection",
function(dbObj, obj, ...) {
+
+ stopifnot(!is.null(obj))
+
if (is.integer(obj)) "INTEGER"
else if (inherits(obj, "Date")) "DATE"
else if (identical(class(obj), "times")) "TIME"
else if (inherits(obj, "POSIXct")) "TIMESTAMP"
else if (inherits(obj, "integer64")) "BIGINT"
else if (is.numeric(obj)) "DOUBLE"
- else "VARCHAR(255)"
+ else "VARCHAR"
},
valueClass = "character"
)
@@ 365,7 413,8 @@ setMethod(
'dbListFields',
c('DrillConnection', 'character'),
function(conn, name, ...) {
- quoted.name <- dbQuoteIdentifier(conn, name)
+ #quoted.name <- dbQuoteIdentifier(conn, name)
+ quoted.name <- name
names(dbGetQuery(conn, paste('SELECT * FROM', quoted.name, 'LIMIT 1')))
}
)
@@ 385,24 434,17 @@ setMethod(
) -> res
# fatal query error on the Drill side so return no fields
- if (httr::status_code(res) != 200) {
- #warning(content(res, as="parsed"), call.=FALSE)
- return(character())
- }
+ if (httr::status_code(res) != 200) return(character())
out <- httr::content(res, as = "text", encoding = "UTF-8")
out <- jsonlite::fromJSON(out, flatten = TRUE)
- suppressMessages(
- dplyr::tbl_df(
- readr::type_convert(out$rows, na = character())
- )
- ) -> xdf
-
- if (length(out$columns) != 0) xdf <- xdf[,out$columns]
-
- colnames(xdf)
+ if (length(out$columns) != 0) {
+ return(out$columns)
+ } else {
+ return(colnames(out$rows))
+ }
}
)
@@ 417,3 459,57 @@ setMethod(
'DrillResult',
function(res, ...) { return(res@statement) }
)
+
+
+#' Metadata about database objects
+#' @rdname dbGetInfo
+#' @param dbObj A \code{\linkS4class{DrillDriver}} or \code{\linkS4class{DrillConnection}} object
+#' @export
+setMethod(
+ "dbGetInfo",
+ "DrillDriver",
+ function(dbObj) {
+ return(
+ list(
+ driver.version = packageVersion("sergeant"),
+ client.version = packageVersion("sergeant")
+ )
+ )
+ }
+)
+
+#' @rdname dbGetInfo
+#' @export
+setMethod(
+ "dbGetInfo",
+ "DrillConnection",
+ function(dbObj) {
+ return(list(
+ host = dbObj@host,
+ port = dbObj@port,
+ username = dbObj@username,
+ ssl = dbObj@ssl,
+ implicits = dbObj@implicits,
+ db.version = dbGetQuery(dbObj, "SELECT version FROM sys.version")[["version"]],
+ dbname = ""
+ ))
+ }
+)
+
+#' A concise character representation (label) for a `DrillConnection`
+#'
+#' @param x a `DrillConnection`
+#' @param ... ignored
+#' @export
+format.DrillConnection <- function(x, ...) {
+ if (dbIsValid(x)) {
+ sprintf("<DrillConnection %s:%s>", x@host, x@port)
+ }
+}
+
+
+
+
+
+
+
M R/dplyr.r => R/dplyr.r +57 -53
@@ 1,13 1,13 @@
#' Connect to Drill (dplyr)
#'
-#' Use \code{src_drill()} to connect to a Drill cluster and `tbl()` to connect to a
+#' Use `src_drill()` to connect to a Drill cluster and `tbl()` to connect to a
#' fully-qualified "table reference". The vast majority of Drill SQL functions have
-#' also been made available to the \code{dplyr} interface. If you have custom Drill
+#' also been made available to the `dplyr` interface. If you have custom Drill
#' SQL functions that need to be implemented please file an issue on GitHub.
#'
#' @md
-#' @param host Drill host (will pick up the value from \code{DRILL_HOST} env var)
-#' @param port Drill port (will pick up the value from \code{DRILL_PORT} env var)
+#' @param host Drill host (will pick up the value from `DRILL_HOST` env var)
+#' @param port Drill port (will pick up the value from `DRILL_PORT` env var)
#' @param ssl use ssl?
#' @family Drill REST `dplyr` API
#' @param username,password if not `NULL` the credentials for the Drill service.
@@ 105,7 105,7 @@ db_desc.src_drill <- function(x) {
#' @keywords internal
#' @export
sql_escape_ident.DrillConnection <- function(con, x) {
- ifelse(grepl("`", x), sql_quote(x, ' '), sql_quote(x, '`'))
+ ifelse(grepl("`", x), dbplyr::sql_quote(x, ' '), dbplyr::sql_quote(x, '`'))
}
#' @rdname src_tbls
@@ 129,7 129,7 @@ tbl.src_drill <- function(src, from, ...) {
#' @keywords internal
#' @export
db_explain.DrillConnection <- function(con, sql, ...) {
- explain_sql <- dbplyr::build_sql("EXPLAIN PLAN FOR ", sql)
+ explain_sql <- dbplyr::build_sql("EXPLAIN PLAN FOR ", sql, con = con)
explanation <- dbGetQuery(con, explain_sql)
return(paste(explanation[[1]], collapse = "\n"))
}
@@ 140,7 140,8 @@ db_explain.DrillConnection <- function(con, sql, ...) {
db_query_fields.DrillConnection <- function(con, sql, ...) {
fields <- dbplyr::build_sql(
- "SELECT * FROM ", sql, " LIMIT 1",
+ # "SELECT * FROM ", sql, " LIMIT 1",
+ "SELECT * FROM ", dplyr::sql_subquery(con, sql), " LIMIT 1",
con = con
)
result <- dbSendQuery(con, fields)
@@ 183,6 184,7 @@ sql_translate_env.DrillConnection <- function(con) {
scalar = dbplyr::sql_translator(
.parent = dbplyr::base_scalar,
`!=` = dbplyr::sql_infix("<>"),
+ as.integer64 = function(x) build_sql("CAST(", x, "AS BIGINT)"),
as.numeric = function(x) build_sql("CAST(", x, " AS DOUBLE)"),
as.character = function(x) build_sql("CAST(", x, " AS CHARACTER)"),
as.date = function(x) build_sql("CAST(", x, " AS DATE)"),
@@ 192,54 194,56 @@ sql_translate_env.DrillConnection <- function(con) {
grepl = function(x, y) build_sql("CONTAINS(", y, ", ", x, ")"),
gsub = function(x, y, z) build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"),
trimws = function(x) build_sql("TRIM(both ' ' FROM ", x, ")"),
- cbrt = sql_prefix("CBRT", 1),
- degrees = sql_prefix("DEGREES", 1),
- e = sql_prefix("E", 0),
- row_number = sql_prefix("row_number", 0),
- lshift = sql_prefix("LSHIFT", 2),
- mod = sql_prefix("MOD", 2),
- age = sql_prefix("AGE", 1),
- negative = sql_prefix("NEGATIVE", 1),
- pi = sql_prefix("PI", 0),
- pow = sql_prefix("POW", 2),
- radians = sql_prefix("RADIANS", 1),
- rand = sql_prefix("RAND", 0),
- rshift = sql_prefix("RSHIFT", 2),
- trunc = sql_prefix("TRUNC", 2),
- contains = sql_prefix("CONTAINS", 2),
- convert_to = sql_prefix("CONVERT_TO", 2),
- convert_from = sql_prefix("CONVERT_FROM", 2),
- string_binary = sql_prefix("STRING_BINARY", 1),
- binary_string = sql_prefix("BINARY_STRING", 1),
- to_char = sql_prefix("TO_CHAR", 2),
- to_date = sql_prefix("TO_DATE", 2),
- to_number = sql_prefix("TO_NUMBER", 2),
- char_to_timestamp = sql_prefix("TO_TIMESTAMP", 2),
- double_to_timestamp = sql_prefix("TO_TIMESTAMP", 1),
- char_length = sql_prefix("CHAR_LENGTH", 1),
- flatten = sql_prefix("FLATTEN", 1),
- kvgen = sql_prefix("KVGEN", 1),
- repeated_count = sql_prefix("REPEATED_COUNT", 1),
- repeated_contains = sql_prefix("REPEATED_CONTAINS", 2),
- ilike = sql_prefix("ILIKE", 2),
- init_cap = sql_prefix("INIT_CAP", 1),
- length = sql_prefix("LENGTH", 1),
- lower = sql_prefix("LOWER", 1),
- tolower = sql_prefix("LOWER", 1),
- ltrim = sql_prefix("LTRIM", 2),
- nullif = sql_prefix("NULLIF", 2),
+ cbrt = dbplyr::sql_prefix("CBRT", 1),
+ degrees = dbplyr::sql_prefix("DEGREES", 1),
+ as_interval = function(x, y) build_sql("CAST(", x, " AS INTERVAL ", sql(toupper(y)), ")"),
+ e = dbplyr::sql_prefix("E", 0),
+ cast = function(x, y) build_sql("CAST(", x, " AS ", sql(y), ")"),
+ row_number = dbplyr::sql_prefix("row_number", 0),
+ lshift = dbplyr::sql_prefix("LSHIFT", 2),
+ mod = dbplyr::sql_prefix("MOD", 2),
+ age = dbplyr::sql_prefix("AGE", 1),
+ negative = dbplyr::sql_prefix("NEGATIVE", 1),
+ pi = dbplyr::sql_prefix("PI", 0),
+ pow = dbplyr::sql_prefix("POW", 2),
+ radians = dbplyr::sql_prefix("RADIANS", 1),
+ rand = dbplyr::sql_prefix("RAND", 0),
+ rshift = dbplyr::sql_prefix("RSHIFT", 2),
+ trunc = dbplyr::sql_prefix("TRUNC", 2),
+ contains = dbplyr::sql_prefix("CONTAINS", 2),
+ convert_to = dbplyr::sql_prefix("CONVERT_TO", 2),
+ convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2),
+ string_binary = dbplyr::sql_prefix("STRING_BINARY", 1),
+ binary_string = dbplyr::sql_prefix("BINARY_STRING", 1),
+ to_char = dbplyr::sql_prefix("TO_CHAR", 2),
+ to_date = dbplyr::sql_prefix("TO_DATE", 2),
+ to_number = dbplyr::sql_prefix("TO_NUMBER", 2),
+ char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2),
+ double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1),
+ char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1),
+ flatten = dbplyr::sql_prefix("FLATTEN", 1),
+ kvgen = dbplyr::sql_prefix("KVGEN", 1),
+ repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1),
+ repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2),
+ ilike = dbplyr::sql_prefix("ILIKE", 2),
+ init_cap = dbplyr::sql_prefix("INIT_CAP", 1),
+ length = dbplyr::sql_prefix("LENGTH", 1),
+ lower = dbplyr::sql_prefix("LOWER", 1),
+ tolower = dbplyr::sql_prefix("LOWER", 1),
+ ltrim = dbplyr::sql_prefix("LTRIM", 2),
+ nullif = dbplyr::sql_prefix("NULLIF", 2),
position = function(x, y) build_sql("POSITION(", x, " IN ", y, ")"),
- regexp_replace = sql_prefix("REGEXP_REPLACE", 3),
- rtrim = sql_prefix("RTRIM", 2),
- rpad = sql_prefix("RPAD", 2),
- rpad_with = sql_prefix("RPAD", 3),
- lpad = sql_prefix("LPAD", 2),
- lpad_with = sql_prefix("LPAD", 3),
- strpos = sql_prefix("STRPOS", 2),
- substr = sql_prefix("SUBSTR", 3),
+ regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3),
+ rtrim = dbplyr::sql_prefix("RTRIM", 2),
+ rpad = dbplyr::sql_prefix("RPAD", 2),
+ rpad_with = dbplyr::sql_prefix("RPAD", 3),
+ lpad = dbplyr::sql_prefix("LPAD", 2),
+ lpad_with = dbplyr::sql_prefix("LPAD", 3),
+ strpos = dbplyr::sql_prefix("STRPOS", 2),
+ substr = dbplyr::sql_prefix("SUBSTR", 3),
trim = function(x, y, z) build_sql("TRIM(", x, " ", y, " FROM ", z, ")"),
- upper = sql_prefix("UPPER", 1),
- toupper = sql_prefix("UPPER", 1)
+ upper = dbplyr::sql_prefix("UPPER", 1),
+ toupper = dbplyr::sql_prefix("UPPER", 1)
),
aggregate = dbplyr::sql_translator(
A R/drill-docker.R => R/drill-docker.R +156 -0
@@ 0,0 1,156 @@
+#' Start a Dockerized Drill Instance
+#'
+#' This is a "get you up and running quickly" helper function as it only
+#' runs a standalone mode Drill instance and is optionally removed after the container
+#' is stopped. You should customize your own Drill containers based on the
+#' one at [Drill's Docker Hub](https://hub.docker.com/u/drill).
+#'
+#' The path specified in `data_dir` will be mapped inside the container as
+#' `/data` and a new `dfs` storage workspace will created (`dfs.d`) that
+#' maps to `/data` and is writable.
+#'
+#' Use [drill_down()] to stop a running Drill container by container id
+#' (full or partial).
+#'
+#' @md
+#' @note this requires a working Docker setup on your system and it is *highly suggested*
+#' you `docker pull` it yourself before running this function.
+#' @param image Drill image to use. Must be a valid image from
+#' [Drill's Docker Hub](https://hub.docker.com/u/drill). Defaults
+#' to most recent Drill docker image.
+#' @param container_name naem for the container. Defaults to "`drill`".
+#' @param data_dir valid path to a place where your data is stored; defaults to the
+#' value of [getwd()]. This will be [path.expand()]ed and mapped to `/data`
+#' in the container. This will be mapped to the `dfs` storage plugin as the
+#' `dfs.d` workspace.
+#' @param remove remove the Drill container instance after it's stopped?
+#' Defaults to `TRUE` since you shouldn't be relying on this in production.
+#' @return a `stevedore` docker object (invisibly) which *you* are responsible
+#' for killing with the `$stop()` function or from the Docker command
+#' line (in interactive mode the docker container ID is printed as well).
+#' @export
+#' @family Drill Docker functions
+#' @examples \dontrun{
+#' drill_up(data_dir = "~/Data")
+#' }
+drill_up <- function(image = "drill/apache-drill:1.16.0",
+ container_name = "drill",
+ data_dir = getwd(), remove = TRUE) {
+
+ data_dir <- path.expand(data_dir)
+
+ stopifnot(dir.exists(data_dir))
+
+ if (!requireNamespace("stevedore", quietly = TRUE)) {
+ stop("The stevedore package must be installed to use this function")
+ }
+
+ docker <- stevedore::docker_client()
+
+ docker$container$run(
+ image = image,
+ name = container_name,
+ ports = "8047:8047",
+ detach = TRUE,
+ rm = remove,
+ tty = TRUE,
+ cmd = "/bin/bash",
+ volumes = sprintf("%s:/data", data_dir)
+ ) -> drill
+
+ if (interactive()) {
+ message(
+ "Drill container started. Waiting for the service to become active (this may take up to 30s)."
+ )
+ }
+
+ drill_con <- drill_connection("localhost")
+
+ for (i in 1:30) {
+ if (drill_active(drill_con)) break
+ Sys.sleep(1L)
+ }
+
+ if (!drill_active(drill_con)) {
+ stop("Could not connect to Drill container.")
+ }
+
+ r <- drill_storage(drill_con, "dfs", "raw")
+
+ # ugly but the jsonlite targeted "unboxing" code would be uglier
+ gsub(
+ '"workspaces" : \\{',
+ '"workspaces" : \\{\n "d" : { "location" : "/data", "writable" : true, "defaultInputFormat" : null, "allowAccessOutsideWorkspace" : false },',
+ r
+ ) -> r
+
+ drill_mod_storage(drill_con, "dfs", r)
+
+ if (interactive()) message("Drill container ID: ", drill$id())
+
+ invisible(drill)
+
+}
+
+#' @rdname drill_up
+#' @param id the id of the Drill container
+#' @export
+drill_down <- function(id) {
+
+ docker <- stevedore::docker_client()
+ docker$container$get(id)$stop()
+
+}
+
+#' Show all dead and running Drill Docker containers
+#'
+#' This function will show _all_ Docker containers that are based on an
+#' image matching a runtime command of "`bin/drill-embedded`".
+#'
+#' @family Drill Docker functions
+#' @export
+showall_drill <- function() {
+
+ docker <- stevedore::docker_client()
+
+ x <- docker$container$list(all=TRUE)
+
+ x <- x[grepl("bin/drill-embedded", x$command, fixed = TRUE),]
+ if (nrow(x) > 0) {
+ message(sprintf(
+ "Drill containers found: [%s]\nReturning data frame of container metadata (invisibly).",
+ paste0(substr(x$id, 1, 16), collapse=", ")
+ ))
+ return(invisible(x))
+ } else {
+ message("No Drill containers running matching target command found.")
+ }
+
+}
+
+#' Prune all dead and running Drill Docker containers
+#'
+#' _This is a destructive function._ It will stop **any** Docker container that
+#' is based on an image matching a runtime command of "`bin/drill-embedded`".
+#' It's best used when you had a session forcefully interuppted and had been
+#' using the R helper functions to start/stop the Drill Docker container.
+#' You may want to consider using the Docker command-line interface to perform
+#' this work manually.
+#'
+#' @family Drill Docker functions
+#' @export
+killall_drill <- function() {
+
+ docker <- stevedore::docker_client()
+ x <- docker$container$list(all=TRUE)
+ for (i in 1:nrow(x)) {
+ if (grepl("bin/drill-embedded", x$command[i], fixed = TRUE)) {
+ message(sprintf("Pruning: %s...", x$id[i]))
+ if (x$state[i] == "running") {
+ cntnr <- docker$container$get(x$id[i])
+ suppressWarnings(try(cntnr$stop(), silent = TRUE))
+ suppressWarnings(try(cntnr$remove()(), silent = TRUE))
+ }
+ }
+ }
+}
M R/query.r => R/query.r +158 -11
@@ 15,7 15,7 @@
#' @param .progress if \code{TRUE} (default if in an interactive session) then ask
#' \code{httr::POST} to display a progress bar
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @export
#' @examples
#' try({
@@ 40,17 40,30 @@ drill_query <- function(drill_con, query, uplift=TRUE, .progress=interactive())
drill_server <- make_server(drill_con)
if (.progress) {
- res <- httr::POST(sprintf("%s/query.json", drill_server),
- encode="json",
- progress(),
- body=list(queryType="SQL", query=query))
+ httr::POST(
+ url = sprintf("%s/query.json", drill_server),
+ encode = "json",
+ httr::progress(),
+ body = list(
+ queryType = "SQL",
+ query = query
+ )
+ ) -> res
} else {
- res <- httr::POST(sprintf("%s/query.json", drill_server),
- encode="json",
- body=list(queryType="SQL", query=query))
+ httr::POST(
+ url = sprintf("%s/query.json", drill_server),
+ encode = "json",
+ body = list(
+ queryType = "SQL",
+ query = query
+ )
+ ) -> res
}
- out <- jsonlite::fromJSON(httr::content(res, as="text", encoding="UTF-8"), flatten=TRUE)
+ jsonlite::fromJSON(
+ httr::content(res, as="text", encoding="UTF-8"),
+ flatten=TRUE
+ ) -> out
if ("errorMessage" %in% names(out)) {
message(sprintf("Query ==> %s\n%s\n", gsub("[\r\n]", " ", query), out$errorMessage))
@@ 77,8 90,142 @@ drill_query <- function(drill_con, query, uplift=TRUE, .progress=interactive())
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
drill_uplift <- function(query_result) {
+
if (length(query_result$columns) != 0) {
- query_result$rows <- query_result$rows[,query_result$columns]
+ query_result$rows <- query_result$rows[,query_result$columns,drop=FALSE]
+ }
+
+ if (length(query_result$columns) != 0) {
+
+ if (is.data.frame(query_result$rows)) {
+
+ if (nrow(query_result$rows) > 0) {
+ query_result$rows <- query_result$rows[,query_result$columns,drop=FALSE]
+ }
+
+ } else {
+
+ lapply(1:length(query_result$columns), function(col_idx) {
+
+ ctype <- query_result$metadata[col_idx]
+
+ if (ctype == "INT") {
+ integer(0)
+ } else if (ctype == "VARCHAR") {
+ character(0)
+ } else if (ctype == "TIMESTAMP") {
+ cx <- integer(0)
+ class(cx) <- "POSIXct"
+ cx
+ } else if (ctype == "BIGINT") {
+ integer64(0)
+ } else if (ctype == "BINARY") {
+ character(0)
+ } else if (ctype == "BOOLEAN") {
+ logical(0)
+ } else if (ctype == "DATE") {
+ cx <- integer(0)
+ class(cx) <- "Date"
+ cx
+ } else if (ctype == "FLOAT") {
+ numeric(0)
+ } else if (ctype == "DOUBLE") {
+ double(0)
+ } else if (ctype == "TIME") {
+ character(0)
+ } else if (ctype == "INTERVAL") {
+ character(0)
+ } else {
+ character(0)
+ }
+
+ }) -> xdf
+
+ xdf <- set_names(xdf, query_result$columns)
+ class(xdf) <- c("data.frame")
+ return(xdf)
+
+ }
+
+ } else {
+
+ xdf <- dplyr::tibble()
+ return(xdf)
+
}
- dplyr::tbl_df(readr::type_convert(query_result$rows))
+
+ # ** only available in Drill 1.15.0+ **
+ # be smarter about type conversion now that the REST API provides
+ # the necessary metadata
+ if (length(query_result$metadata)) {
+
+ if ("BIGINT" %in% query_result$metadata) {
+ if (!.pkgenv$bigint_warn_once) {
+ if (getOption("sergeant.bigint.warnonce", TRUE)) {
+ warning(
+ "One or more columns are of type BIGINT. ",
+ "The sergeant package currently uses jsonlite::fromJSON() ",
+ "to process Drill REST API result sets. Since jsonlite does not ",
+ "support 64-bit integers BIGINT columns are initially converted ",
+ "to numeric since that's how jsonlite::fromJSON() works. This is ",
+ "problematic for many reasons, including trying to use 'dplyr' idioms ",
+ "with said converted BIGINT-to-numeric columns. It is recommended that ",
+ "you 'CAST' BIGINT columns to 'VARCHAR' prior to working with them from ",
+ "R/'dplyr'.\n\n",
+ "If you really need BIGINT/integer64 support, consider using the ",
+ "R ODBC interface to Apache Drill with the MapR ODBC drivers.\n\n",
+ "This informational warning will only be shown once per R session and ",
+ "you can disable them from appearing by setting the 'sergeant.bigint.warnonce' ",
+ "option to 'FALSE' (i.e. options(sergeant.bigint.warnonce = FALSE)).",
+ call.=FALSE
+ )
+ }
+ .pkgenv$bigint_warn_once <- TRUE
+ }
+ }
+
+ sapply(1:length(query_result$columns), function(col_idx) {
+
+ cname <- query_result$columns[col_idx]
+ ctype <- query_result$metadata[col_idx]
+
+ case_when(
+ ctype == "INT" ~ "i",
+ ctype == "VARCHAR" ~ "c",
+ ctype == "TIMESTAMP" ~ "?",
+ ctype == "BIGINT" ~ "?",
+ ctype == "BINARY" ~ "c",
+ ctype == "BOOLEAN" ~ "l",
+ ctype == "DATE" ~ "?",
+ ctype == "FLOAT" ~ "d",
+ ctype == "DOUBLE" ~ "d",
+ ctype == "TIME" ~ "c",
+ ctype == "INTERVAL" ~ "?",
+ TRUE ~ "?"
+ )
+
+ }) -> col_types
+
+ suppressMessages(
+ dplyr::tbl_df(
+ readr::type_convert(
+ df = query_result$rows,
+ col_types = paste0(col_types, collapse=""),
+ na = character()
+ )
+ )
+ ) -> xdf
+
+ } else {
+
+ suppressMessages(
+ dplyr::tbl_df(
+ readr::type_convert(df = query_result$rows, na = character())
+ )
+ ) -> xdf
+
+ }
+
+ xdf
+
}
R R/sergeant.r => R/rest-api.r +92 -27
@@ 10,7 10,7 @@ s_head <- purrr::safely(httr::HEAD)
#' @note If `user`/`password` are set this function will make a `POST` to the REST
#' interface immediately to prime the cookie-jar with the session id.
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples
#' dc <- drill_connection()
drill_connection <- function(host=Sys.getenv("DRILL_HOST", "localhost"),
@@ 34,13 34,13 @@ drill_connection <- function(host=Sys.getenv("DRILL_HOST", "localhost"),
}
-#' Test whether Drill HTTP Dill direct REST API Interface server is up
+#' Test whether Drill HTTP Drill direct REST API Interface server is up
#'
#' This is a very simple test (performs \code{HEAD /} request on the Drill server/cluster)
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples
#' try({
#' drill_connection() %>% drill_active()
@@ 55,7 55,7 @@ drill_active <- function(drill_con) {
#' @note The output of this is in a "viewer" window
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples \dontrun{
#' drill_connection() %>% drill_status()
#' }
@@ 72,7 72,7 @@ drill_status <- function(drill_con) {
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples \dontrun{
#' drill_connection() %>% drill_metrics()
#' }
@@ 89,7 89,7 @@ drill_metrics <- function(drill_con) {
#' @note The output of this is in a "viewer" window
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples \dontrun{
#' drill_connection() %>% drill_threads()
#' }
@@ 107,7 107,7 @@ drill_threads <- function(drill_con) {
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples \dontrun{
#' drill_connection() %>% drill_profiles()
#' }
@@ 124,7 124,7 @@ drill_profiles <- function(drill_con) {
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @param query_id UUID of the query in standard UUID format that Drill assigns to each query
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @export
drill_profile <- function(drill_con, query_id) {
drill_server <- make_server(drill_con)
@@ 139,7 139,7 @@ drill_profile <- function(drill_con, query_id) {
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @param query_id the UUID of the query in standard UUID format that Drill assigns to each query.
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @export
drill_cancel <- function(drill_con, query_id) {
drill_server <- make_server(drill_con)
@@ 149,13 149,20 @@ drill_cancel <- function(drill_con, query_id) {
invisible(TRUE)
}
-#' Get the list of storage plugin names and configurations
+#' Retrieve, modify or update storage plugin names and configurations
+#'
+#' Retrieve, modify or remove storage plugins from a Drill instance. If you intend
+#' to modify an existing configuration it is suggested that you use the "`list`" or
+#' "`raw`" values to the `as` parameter to make it easier to modify them.
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @param plugin the assigned name in the storage plugin definition.
+#' @param as one of "`tbl`" or "`list`" or "`raw`". The latter two are useful if you want
+#' modify an existing storage plugin (e.g. add a workspace) via
+#' [drill_mod_storage()].
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples \dontrun{
#' drill_connection() %>% drill_storage()
#'
@@ 181,7 188,9 @@ drill_cancel <- function(drill_con, query_id) {
#' }
#' ')
#' }
-drill_storage <- function(drill_con, plugin=NULL) {
+drill_storage <- function(drill_con, plugin=NULL, as=c("tbl", "list", "raw")) {
+
+ as <- match.arg(as[1], c("tbl", "list", "raw"))
drill_server <- make_server(drill_con)
@@ 193,18 202,24 @@ drill_storage <- function(drill_con, plugin=NULL) {
httr::stop_for_status(res)
- cnt <- httr::content(res, as="text", encoding="UTF-8")
- jsonlite::fromJSON(cnt, flatten=TRUE) %>%
- dplyr::tbl_df()
+ out <- httr::content(res, as="text", encoding="UTF-8")
+
+ switch(
+ as,
+ tbl = jsonlite::fromJSON(out, flatten=TRUE) %>% dplyr::tbl_df(),
+ list = jsonlite::fromJSON(
+ out, simplifyVector = TRUE, simplifyDataFrame = FALSE, flatten = FALSE
+ ),
+ raw = out
+ )
}
#' @md
#' @rdname drill_storage
#' @param name name of the storage plugin configuration to create/update/remove
-#' @param config a `list` or raw character, valid JSON of a complete storage
-#' spec
-#' @family Dill direct REST API Interface
+#' @param config a raw 1-element character vector containing valid JSON of a
+#' complete storage spec
#' @export
drill_mod_storage <- function(drill_con, name, config) {
@@ 213,19 228,20 @@ drill_mod_storage <- function(drill_con, name, config) {
httr::POST(
url = sprintf("%s/storage/%s.json", drill_server, name),
httr::content_type_json(),
- body = config
+ body = config,
+ encode = "raw"
) -> res
httr::stop_for_status(res)
- cnt <- httr::content(res, as="text", encoding="UTF-8")
- jsonlite::fromJSON(cnt, flatten=TRUE)
+ out <- httr::content(res, as="text", encoding="UTF-8")
+
+ invisible(jsonlite::fromJSON(out, flatten=TRUE)$result == "success")
}
#' @md
#' @rdname drill_storage
-#' @family Dill direct REST API Interface
#' @export
drill_rm_storage <- function(drill_con, name) {
@@ 248,7 264,7 @@ drill_rm_storage <- function(drill_con, name) {
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @param pattern pattern to filter results by
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_connection() %>% drill_options()
@@ 268,7 284,7 @@ drill_options <- function(drill_con, pattern=NULL) {
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_connection() %>% drill_stats()
@@ 285,7 301,7 @@ drill_stats <- function(drill_con) {
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_connection() %>% drill_version()
@@ 309,7 325,7 @@ drill_version <- function(drill_con) {
#' @note You _must_ be using Drill 1.15.0+ to use this function
#' @export
#' @return data frame
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_connection() %>% drill_functions()
@@ 335,7 351,7 @@ drill_functions <- function(drill_con, browse=FALSE) {
if (!requireNamespace("DT", quietly = TRUE)) {
warning("The DT must be installed to use this function")
} else {
- DT::datatable(out, options = list(pageLength = 100))
+ print(DT::datatable(out, options = list(pageLength = 100)))
}
}
@@ 343,6 359,55 @@ drill_functions <- function(drill_con, browse=FALSE) {
}
+#' Show all the available Drill options
+#'
+#' @md
+#' @param drill_con drill server connection object setup by \code{drill_connection()}
+#' @param browse if `TRUE` display an HTML interacrtive HTML widget with the options
+#' as well as reutrn the data frame with the options Default if `FALSE`.
+#' @note You _must_ be using Drill 1.15.0+ to use this function
+#' @export
+#' @return data frame
+#' @family Drill direct REST API Interface
+#' @references \href{https://drill.apache.org/docs/querying-system-tables/#querying-the-options-table}{Drill documentation}
+#' @examples \dontrun{
+#' drill_connection() %>% drill_opts()
+#' }
+drill_opts <- function(drill_con, browse=FALSE) {
+ stopifnot(utils::compareVersion(drill_version(drill_con), "1.15.0") >= 0)
+ if (inherits(drill_con, "src_drill")) {
+ dplyr::collect(
+ dplyr::tbl(drill_con, dplyr::sql("(SELECT * FROM sys.options)"))
+ ) -> out
+ } else {
+ drill_query(
+ drill_con = drill_con,
+ query = "SELECT * FROM sys.options",
+ uplift = TRUE,
+ .progress = FALSE
+ ) -> out
+ }
+
+ if (browse) {
+ if (!requireNamespace("DT", quietly = TRUE)) {
+ warning("The DT must be installed to use this function")
+ } else {
+ print(DT::datatable(out, options = list(pageLength = 100)))
+ }
+ }
+
+ out
+
+}
+#' Print function for `drill_conn` objects
+#'
+#' @md
+#' @param x a `drill_conn` object made with [drill_connection()]
+#' @param ... unused
+#' @export
+print.drill_conn <- function(x, ...) {
+ cat(sprintf("<Drill REST API Direct Connection to %s:%s>\n", x$host, x$port))
+}
M R/sergeant-package.r => R/sergeant-package.r +6 -3
@@ 20,22 20,25 @@
#' You can install and run a Drillbit service on one node or on many nodes to form a
#' distributed cluster environment. When a Drillbit runs on each data node in a cluster,
#' Drill can maximize data locality during query execution without moving data over the
-#' network or between nodes. Drill uses ZooKeeper to maintain cluster membership and health
+#' network or between nodes. Drill uses Zookeeper to maintain cluster membership and health
#' check information.
#'
#' Methods are provided to work with Drill via the REST APIs along with R
-#' \code{DBI} and \code{dplyr} interfaces.
+#' \code{DBI} and \code{dplyr} interfaces. Helper functions are included to facilitate
+#' using official 'Drill' 'Docker' images/containers.
#'
#' @name sergeant
+#' @keywords internal
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import utils DBI methods bit64 httr jsonlite htmltools bit64
#' @importFrom scales comma
#' @importFrom purrr map map2 map2_df %>%
+#' @importFrom readr type_convert
#' @importFrom dplyr mutate select left_join bind_cols bind_rows data_frame tbl filter
#' @importFrom dplyr db_desc src db_data_type db_explain sql_translate_env copy_to
-#' @importFrom dplyr db_query_fields src_tbls sql_escape_ident case_when
+#' @importFrom dplyr db_query_fields src_tbls sql_escape_ident case_when collect
#' @importFrom dbplyr build_sql sql_prefix sql_quote src_sql tbl_sql
#' @importFrom dbplyr win_recycled win_current_group base_win base_agg base_scalar win_over sql
NULL
M R/set.R => R/set.R +11 -11
@@ 7,12 7,12 @@
#' If any query errors result, error messages will be presented to the console.
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
-#' @param ... named parameters to be sent to ALTER [SYSTEM|SESSION]
+#' @param ... named parameters to be sent to `ALTER SYSTEM` or `ALTER SESSION`
#' @param type set the \code{session} or \code{system} parameter
#' @return a \code{tbl} (invisibly) with the \code{ALTER} queries sent and results, including errors.
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @examples \dontrun{
#' drill_connection() %>%
#' drill_set(exec.errors.verbose=TRUE, store.format="parquet", web.logs.max_lines=20000)
@@ 27,11 27,11 @@ drill_set <- function(drill_con, ..., type=c("session", "system")) {
purrr::map2(names(params), params, ~sprintf("ALTER %s SET `%s` = %s", type, .x, .y)) %>%
purrr::map_df(function(x) {
- y <- drill_query(drill_con, x)
+ y <- drill_query(drill_con, x, .progress=FALSE)
if (length(y) == 2) {
- dplyr::data_frame(query=x, param=y$summary, value=y$ok, error_msg=NA)
+ dplyr::tibble(query=x, param=y$summary, value=y$ok, error_msg=NA)
} else {
- dplyr::data_frame(query=x, param=NA, value=NA, error_msg=y[[1]])
+ dplyr::tibble(query=x, param=NA, value=NA, error_msg=y[[1]])
}
}) -> res
@@ 55,7 55,7 @@ drill_set <- function(drill_con, ..., type=c("session", "system")) {
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @param ... bare name of system options to reset
#' @param all if \code{TRUE}, all parameters are reset (\code{...} is ignored)
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
#' @examples \dontrun{
@@ 70,9 70,9 @@ drill_system_reset <- function(drill_con, ..., all=FALSE) {
purrr::map_df(function(x) {
y <- drill_query(drill_con, x)
if (length(y) == 2) {
- dplyr::data_frame(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
+ dplyr::tibble(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
} else {
- dplyr::data_frame(query=x, param=NA, value=NA, error=y[[1]])
+ dplyr::tibble(query=x, param=NA, value=NA, error=y[[1]])
}
}) -> res
@@ 98,7 98,7 @@ drill_system_reset <- function(drill_con, ..., all=FALSE) {
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @param ... bare name of system options to reset
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
-#' @family Dill direct REST API Interface
+#' @family Drill direct REST API Interface
#' @export
#' @examples \dontrun{
#' drill_connection() %>% drill_settings_reset(exec.errors.verbose)
@@ 110,9 110,9 @@ drill_settings_reset <- function(drill_con, ...) {
purrr::map_df(function(x) {
y <- drill_query(drill_con, x)
if (length(y) == 2) {
- dplyr::data_frame(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
+ dplyr::tibble(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
} else {
- dplyr::data_frame(query=x, param=NA, value=NA, error=y[[1]])
+ dplyr::tibble(query=x, param=NA, value=NA, error=y[[1]])
}
}) -> res
M R/utils.r => R/utils.r +5 -0
@@ 27,3 27,8 @@ auth_drill <- function(ssl, host, port, username, password) {
httr::stop_for_status(res)
}
+
+set_names <- function(object = nm, nm) {
+ names(object) <- nm
+ object
+}
M README.Rmd => README.Rmd +36 -34
@@ 5,12 5,15 @@ editor_options:
---
<!-- README.md is generated from README.Rmd. Please edit that file -->
-```{r, echo = FALSE}
+```{r, echo = FALSE, include=FALSE}
knitr::opts_chunk$set(
+ message = FALSE,
+ warning = FALSE,
collapse = TRUE,
- comment = "##",
+ comment = "## ",
fig.path = "README-"
)
+options(sergeant.bigint.warnonce = FALSE)
```
[](https://doi.org/10.5281/zenodo.1248912)
@@ 44,7 47,7 @@ devtools::install_git("https://github.com/hrbrmstr/sergeant", ref="0.8.0")
Drill + `sergeant` is (IMO) a streamlined alternative to Spark + `sparklyr` if you don't need the ML components of Spark (i.e. just need to query "big data" sources, need to interface with parquet, need to combine disparate data source types — json, csv, parquet, rdbms - for aggregation, etc). Drill also has support for spatial queries.
-Using Drill SQL queries that reference parquet files on a local linux or macOS workstation can often be more performant than doing the same data ingestion & wrangling work with R (especially for large or disperate data sets). Drill can often help further streaming workflows that infolve wrangling many tiny JSON files on a daily basis.
+Using Drill SQL queries that reference parquet files on a local linux or macOS workstation can often be more performant than doing the same data ingestion & wrangling work with R (especially for large or disperate data sets). Drill can often help further streamline workflows that involve wrangling many tiny JSON files on a daily basis.
Drill can be obtained from <https://drill.apache.org/download/> (use "Direct File Download"). Drill can also be installed via [Docker](https://drill.apache.org/docs/running-drill-on-docker/). For local installs on Unix-like systems, a common/suggestion location for the Drill directory is `/usr/local/drill` as the install directory.
@@ 76,7 79,8 @@ Note that a number of Drill SQL functions have been mapped to R functions (e.g.
- `drill_jdbc`: Connect to Drill using JDBC
- `drill_metrics`: Get the current memory metrics
- `drill_options`: List the name, default, and data type of the system and session options
-- `drill_profile`: Get the profile of the query that has the given query id
+- `drill_popts`: Show all the available Drill options (1.15.0+)
+- `drill_rofile`: Get the profile of the query that has the given query id
- `drill_profiles`: Get the profiles of running and completed queries
- `drill_query`: Submit a query and return results
- `drill_set`: Set Drill SYSTEM or SESSION options
@@ 92,7 96,16 @@ Note that a number of Drill SQL functions have been mapped to R functions (e.g.
- `drill_use`: Change to a particular schema.
- `drill_version`: Identify the version of Drill running
-## Installation
+**Helpers**
+
+- `ctas_profile`: Generate a Drill CTAS Statement from a Query
+- `drill_up`:
+ sart a Dockerized Drill Instance
+# `sdrill_down`: stop a Dockerized Drill Instance by container id
+- `howall_drill`: Show all dead and running Drill Docker containers
+- `stopall_drill`: Prune all dead and running Drill Docker containers
+
+# Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/sergeant")
@@ 119,24 132,24 @@ count(db, gender, marital_status)
count(db, gender, marital_status) %>% collect()
-group_by(db, position_title) %>%
+group_by(db, position_title) %>%
count(gender) -> tmp2
-group_by(db, position_title) %>%
- count(gender) %>%
- ungroup() %>%
- mutate(full_desc=ifelse(gender=="F", "Female", "Male")) %>%
- collect() %>%
- select(Title=position_title, Gender=full_desc, Count=n)
-
-arrange(db, desc(employee_id)) %>% print(n=20)
-
-mutate(db, position_title=tolower(position_title)) %>%
- mutate(salary=as.numeric(salary)) %>%
- mutate(gender=ifelse(gender=="F", "Female", "Male")) %>%
- mutate(marital_status=ifelse(marital_status=="S", "Single", "Married")) %>%
- group_by(supervisor_id) %>%
- summarise(underlings_count=n()) %>%
+group_by(db, position_title) %>%
+ count(gender) %>%
+ ungroup() %>%
+ mutate(full_desc = ifelse(gender == "F", "Female", "Male")) %>%
+ collect() %>%
+ select(Title = position_title, Gender = full_desc, Count = n)
+
+arrange(db, desc(employee_id)) %>% print(n = 20)
+
+mutate(db, position_title = tolower(position_title)) %>%
+ mutate(salary = as.numeric(salary)) %>%
+ mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
+ mutate(marital_status = ifelse(marital_status == "S", "Single", "Married")) %>%
+ group_by(supervisor_id) %>%
+ summarise(underlings_count = n()) %>%
collect()
```
@@ 153,7 166,7 @@ drill_storage(dc)$name
drill_query(dc, "SELECT * FROM cp.`employee.json` limit 100")
-drill_query(dc, "SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY gender")
+drill_query(dc, "SELECT COUNT(gender) AS gctFROM cp.`employee.json` GROUP BY gender")
drill_options(dc)
@@ 191,18 204,7 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
)
")
```
-### Test Results
-
-```{r}
-library(sergeant)
-library(testthat)
-
-date()
-
-devtools::test()
-```
-
-## sergeant Metrics
+### sergeant Metrics
```{r echo=FALSE}
cloc::cloc_pkg_md()
M README.md => README.md +184 -279
@@ 44,8 44,8 @@ aggregation, etc). Drill also has support for spatial queries.
Using Drill SQL queries that reference parquet files on a local linux or
macOS workstation can often be more performant than doing the same data
ingestion & wrangling work with R (especially for large or disperate
-data sets). Drill can often help further streaming workflows that
-infolve wrangling many tiny JSON files on a daily basis.
+data sets). Drill can often help further streamline workflows that
+involve wrangling many tiny JSON files on a daily basis.
Drill can be obtained from <https://drill.apache.org/download/> (use
“Direct File Download”). Drill can also be installed via
@@ 83,7 83,7 @@ The following functions are implemented:
- `src_drill`: Connect to Drill (using `dplyr`) + supporting functions
Note that a number of Drill SQL functions have been mapped to R
-functions (e.g. `grepl`) to make it easier to transition from
+functions (e.g. `grepl`) to make it easier to transition from
non-database-backed SQL ops to Drill. See the help on
`drill_custom_functions` for more info on these helper Drill custom
function mappings.
@@ 94,12 94,14 @@ function mappings.
connection
- `drill_active`: Test whether Drill HTTP REST API server is up
- `drill_cancel`: Cancel the query that has the given queryid
- - `drill_functions`: Show all the available Drill built-in functions & UDFs (Apache Drill 1.15.0+ required)
+ - `drill_functions`: Show all the available Drill built-in functions &
+ UDFs (Apache Drill 1.15.0+ required)
- `drill_jdbc`: Connect to Drill using JDBC
- `drill_metrics`: Get the current memory metrics
- `drill_options`: List the name, default, and data type of the system
and session options
- - `drill_profile`: Get the profile of the query that has the given
+ - `drill_popts`: Show all the available Drill options (1.15.0+)
+ - `drill_rofile`: Get the profile of the query that has the given
query id
- `drill_profiles`: Get the profiles of running and completed queries
- `drill_query`: Submit a query and return results
@@ 120,7 122,15 @@ function mappings.
- `drill_use`: Change to a particular schema.
- `drill_version`: Identify the version of Drill running
-## Installation
+**Helpers**
+
+ - `ctas_profile`: Generate a Drill CTAS Statement from a Query
+ - `drill_up`: sart a Dockerized Drill Instance \# `sdrill_down`: stop
+ a Dockerized Drill Instance by container id
+ - `howall_drill`: Show all dead and running Drill Docker containers
+ - `stopall_drill`: Prune all dead and running Drill Docker containers
+
+# Installation
``` r
devtools::install_github("hrbrmstr/sergeant")
@@ 140,100 150,100 @@ db <- tbl(ds, "cp.`employee.json`")
# without `collect()`:
count(db, gender, marital_status)
-## # Source: lazy query [?? x 3]
-## # Database: DrillConnection
-## # Groups: gender
-## marital_status gender n
-## <chr> <chr> <int>
-## 1 S F 297
-## 2 M M 278
-## 3 S M 276
-## 4 M F 304
+## # Source: lazy query [?? x 3]
+## # Database: DrillConnection
+## # Groups: gender
+## gender marital_status n
+## <chr> <chr> <dbl>
+## 1 F S 297
+## 2 M M 278
+## 3 M S 276
+## 4 F M 304
count(db, gender, marital_status) %>% collect()
-## # A tibble: 4 x 3
-## # Groups: gender [2]
-## marital_status gender n
-## * <chr> <chr> <int>
-## 1 S F 297
-## 2 M M 278
-## 3 S M 276
-## 4 M F 304
-
-group_by(db, position_title) %>%
+## # A tibble: 4 x 3
+## # Groups: gender [2]
+## gender marital_status n
+## <chr> <chr> <dbl>
+## 1 F S 297
+## 2 M M 278
+## 3 M S 276
+## 4 F M 304
+
+group_by(db, position_title) %>%
count(gender) -> tmp2
-group_by(db, position_title) %>%
- count(gender) %>%
- ungroup() %>%
- mutate(full_desc=ifelse(gender=="F", "Female", "Male")) %>%
- collect() %>%
- select(Title=position_title, Gender=full_desc, Count=n)
-## # A tibble: 30 x 3
-## Title Gender Count
-## * <chr> <chr> <int>
-## 1 President Female 1
-## 2 VP Country Manager Male 3
-## 3 VP Country Manager Female 3
-## 4 VP Information Systems Female 1
-## 5 VP Human Resources Female 1
-## 6 Store Manager Female 13
-## 7 VP Finance Male 1
-## 8 Store Manager Male 11
-## 9 HQ Marketing Female 2
-## 10 HQ Information Systems Female 4
-## # ... with 20 more rows
-
-arrange(db, desc(employee_id)) %>% print(n=20)
-## # Source: table<cp.`employee.json`> [?? x 20]
-## # Database: DrillConnection
-## # Ordered by: desc(employee_id)
-## store_id gender department_id birth_date supervisor_id last_name position_title hire_date management_role
-## <int> <chr> <int> <date> <int> <chr> <chr> <dttm> <chr>
-## 1 18 F 18 1914-02-02 1140 Stand Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
-## 2 18 M 18 1914-02-02 1140 Burnham Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
-## 3 18 F 18 1914-02-02 1139 Doolittle Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
-## 4 18 M 18 1914-02-02 1139 Pirnie Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
-## 5 18 M 17 1914-02-02 1140 Younce Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 6 18 F 17 1914-02-02 1140 Biltoft Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 7 18 M 17 1914-02-02 1139 Detwiler Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 8 18 F 17 1914-02-02 1139 Ciruli Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 9 18 F 16 1914-02-02 1140 Bishop Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
-## 10 18 F 16 1914-02-02 1140 Cutwright Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
-## 11 18 F 16 1914-02-02 1139 Anderson Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
-## 12 18 F 16 1914-02-02 1139 Swartwood Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
-## 13 18 M 15 1914-02-02 1140 Curtsinger Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 14 18 F 15 1914-02-02 1140 Quick Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 15 18 M 15 1914-02-02 1139 Souza Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 16 18 M 15 1914-02-02 1139 Compagno Store Permane… 1998-01-01 00:00:00 Store Full Tim…
-## 17 18 M 11 1961-09-24 1139 Jaramillo Store Shift S… 1998-01-01 00:00:00 Store Manageme…
-## 18 18 M 11 1972-05-12 17 Belsey Store Assista… 1998-01-01 00:00:00 Store Manageme…
-## 19 12 M 18 1914-02-02 1069 Eichorn Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
-## 20 12 F 18 1914-02-02 1069 Geiermann Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
-## # ... with more rows, and 7 more variables: salary <dbl>, marital_status <chr>, full_name <chr>, employee_id <int>,
-## # education_level <chr>, first_name <chr>, position_id <int>
-
-mutate(db, position_title=tolower(position_title)) %>%
- mutate(salary=as.numeric(salary)) %>%
- mutate(gender=ifelse(gender=="F", "Female", "Male")) %>%
- mutate(marital_status=ifelse(marital_status=="S", "Single", "Married")) %>%
- group_by(supervisor_id) %>%
- summarise(underlings_count=n()) %>%
+group_by(db, position_title) %>%
+ count(gender) %>%
+ ungroup() %>%
+ mutate(full_desc = ifelse(gender == "F", "Female", "Male")) %>%
+ collect() %>%
+ select(Title = position_title, Gender = full_desc, Count = n)
+## # A tibble: 30 x 3
+## Title Gender Count
+## <chr> <chr> <dbl>
+## 1 President Female 1
+## 2 VP Country Manager Male 3
+## 3 VP Country Manager Female 3
+## 4 VP Information Systems Female 1
+## 5 VP Human Resources Female 1
+## 6 Store Manager Female 13
+## 7 VP Finance Male 1
+## 8 Store Manager Male 11
+## 9 HQ Marketing Female 2
+## 10 HQ Information Systems Female 4
+## # … with 20 more rows
+
+arrange(db, desc(employee_id)) %>% print(n = 20)
+## # Source: table<cp.`employee.json`> [?? x 20]
+## # Database: DrillConnection
+## # Ordered by: desc(employee_id)
+## employee_id full_name first_name last_name position_id position_title store_id department_id birth_date hire_date
+## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
+## 1 999 Beverly … Beverly Dittmar 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 2 998 Elizabet… Elizabeth Jantzer 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 3 997 John Swe… John Sweet 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 4 996 William … William Murphy 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 5 995 Carol Li… Carol Lindsay 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 6 994 Richard … Richard Burke 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 7 993 Ethan Bu… Ethan Bunosky 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 8 992 Claudett… Claudette Cabrera 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 9 991 Maria Te… Maria Terry 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 10 990 Stacey C… Stacey Case 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 11 99 Elizabet… Elizabeth Horne 18 Store Tempora… 6 18 1976-10-05 1997-01-…
+## 12 989 Dominick… Dominick Nutter 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 13 988 Brian Wi… Brian Willeford 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 14 987 Margaret… Margaret Clendenen 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 15 986 Maeve Wa… Maeve Wall 17 Store Permane… 8 17 1914-02-02 1998-01-…
+## 16 985 Mildred … Mildred Morrow 16 Store Tempora… 8 16 1914-02-02 1998-01-…
+## 17 984 French W… French Wilson 16 Store Tempora… 8 16 1914-02-02 1998-01-…
+## 18 983 Elisabet… Elisabeth Duncan 16 Store Tempora… 8 16 1914-02-02 1998-01-…
+## 19 982 Linda An… Linda Anderson 16 Store Tempora… 8 16 1914-02-02 1998-01-…
+## 20 981 Selene W… Selene Watson 16 Store Tempora… 8 16 1914-02-02 1998-01-…
+## # … with more rows, and 6 more variables: salary <chr>, supervisor_id <chr>, education_level <chr>,
+## # marital_status <chr>, gender <chr>, management_role <chr>
+
+mutate(db, position_title = tolower(position_title)) %>%
+ mutate(salary = as.numeric(salary)) %>%
+ mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
+ mutate(marital_status = ifelse(marital_status == "S", "Single", "Married")) %>%
+ group_by(supervisor_id) %>%
+ summarise(underlings_count = n()) %>%
collect()
-## # A tibble: 112 x 2
-## supervisor_id underlings_count
-## * <int> <int>
-## 1 0 1
-## 2 1 7
-## 3 5 9
-## 4 4 2
-## 5 2 3
-## 6 20 2
-## 7 21 4
-## 8 22 7
-## 9 6 4
-## 10 36 2
-## # ... with 102 more rows
+## # A tibble: 112 x 2
+## supervisor_id underlings_count
+## <chr> <dbl>
+## 1 0 1
+## 2 1 7
+## 3 5 9
+## 4 4 2
+## 5 2 3
+## 6 20 2
+## 7 21 4
+## 8 22 7
+## 9 6 4
+## 10 36 2
+## # … with 102 more rows
```
### REST API
@@ 242,135 252,92 @@ mutate(db, position_title=tolower(position_title)) %>%
dc <- drill_connection("localhost")
drill_active(dc)
-## [1] TRUE
+## [1] TRUE
drill_version(dc)
-## [1] "1.13.0"
+## [1] "1.15.0"
drill_storage(dc)$name
-## [1] "cp" "dfs" "hbase" "hive" "kudu" "mongo" "s3"
+## [1] "cp" "dfs" "drilldat" "hbase" "hdfs" "hive" "kudu" "mongo" "my" "s3"
drill_query(dc, "SELECT * FROM cp.`employee.json` limit 100")
-## Parsed with column specification:
-## cols(
-## store_id = col_integer(),
-## gender = col_character(),
-## department_id = col_integer(),
-## birth_date = col_date(format = ""),
-## supervisor_id = col_integer(),
-## last_name = col_character(),
-## position_title = col_character(),
-## hire_date = col_datetime(format = ""),
-## management_role = col_character(),
-## salary = col_double(),
-## marital_status = col_character(),
-## full_name = col_character(),
-## employee_id = col_integer(),
-## education_level = col_character(),
-## first_name = col_character(),
-## position_id = col_integer()
-## )
-## # A tibble: 100 x 16
-## store_id gender department_id birth_date supervisor_id last_name position_title hire_date management_role
-## * <int> <chr> <int> <date> <int> <chr> <chr> <dttm> <chr>
-## 1 0 F 1 1961-08-26 0 Nowmer President 1994-12-01 00:00:00 Senior Managem…
-## 2 0 M 1 1915-07-03 1 Whelply VP Country Man… 1994-12-01 00:00:00 Senior Managem…
-## 3 0 M 1 1969-06-20 1 Spence VP Country Man… 1998-01-01 00:00:00 Senior Managem…
-## 4 0 F 1 1951-05-10 1 Gutierrez VP Country Man… 1998-01-01 00:00:00 Senior Managem…
-## 5 0 F 2 1942-10-08 1 Damstra VP Information… 1994-12-01 00:00:00 Senior Managem…
-## 6 0 F 3 1949-03-27 1 Kanagaki VP Human Resou… 1994-12-01 00:00:00 Senior Managem…
-## 7 9 F 11 1922-08-10 5 Brunner Store Manager 1998-01-01 00:00:00 Store Manageme…
-## 8 21 F 11 1979-06-23 5 Blumberg Store Manager 1998-01-01 00:00:00 Store Manageme…
-## 9 0 M 5 1949-08-26 1 Stanz VP Finance 1994-12-01 00:00:00 Senior Managem…
-## 10 1 M 11 1967-06-20 5 Murraiin Store Manager 1998-01-01 00:00:00 Store Manageme…
-## # ... with 90 more rows, and 7 more variables: salary <dbl>, marital_status <chr>, full_name <chr>, employee_id <int>,
-## # education_level <chr>, first_name <chr>, position_id <int>
-
-drill_query(dc, "SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY gender")
-## Parsed with column specification:
-## cols(
-## gender = col_integer()
-## )
-## # A tibble: 2 x 1
-## gender
-## * <int>
-## 1 601
-## 2 554
+## # A tibble: 100 x 16
+## employee_id full_name first_name last_name position_id position_title store_id department_id birth_date hire_date
+## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
+## 1 1 Sheri No… Sheri Nowmer 1 President 0 1 1961-08-26 1994-12-…
+## 2 2 Derrick … Derrick Whelply 2 VP Country Ma… 0 1 1915-07-03 1994-12-…
+## 3 4 Michael … Michael Spence 2 VP Country Ma… 0 1 1969-06-20 1998-01-…
+## 4 5 Maya Gut… Maya Gutierrez 2 VP Country Ma… 0 1 1951-05-10 1998-01-…
+## 5 6 Roberta … Roberta Damstra 3 VP Informatio… 0 2 1942-10-08 1994-12-…
+## 6 7 Rebecca … Rebecca Kanagaki 4 VP Human Reso… 0 3 1949-03-27 1994-12-…
+## 7 8 Kim Brun… Kim Brunner 11 Store Manager 9 11 1922-08-10 1998-01-…
+## 8 9 Brenda B… Brenda Blumberg 11 Store Manager 21 11 1979-06-23 1998-01-…
+## 9 10 Darren S… Darren Stanz 5 VP Finance 0 5 1949-08-26 1994-12-…
+## 10 11 Jonathan… Jonathan Murraiin 11 Store Manager 1 11 1967-06-20 1998-01-…
+## # … with 90 more rows, and 6 more variables: salary <chr>, supervisor_id <chr>, education_level <chr>,
+## # marital_status <chr>, gender <chr>, management_role <chr>
+
+drill_query(dc, "SELECT COUNT(gender) AS gctFROM cp.`employee.json` GROUP BY gender")
drill_options(dc)
-## # A tibble: 138 x 5
-## name value accessibleScopes kind optionScope
-## * <chr> <chr> <chr> <chr> <chr>
-## 1 debug.validate_iterators FALSE ALL BOOLEAN BOOT
-## 2 debug.validate_vectors FALSE ALL BOOLEAN BOOT
-## 3 drill.exec.functions.cast_empty_string_to_null FALSE ALL BOOLEAN BOOT
-## 4 drill.exec.hashagg.fallback.enabled FALSE ALL BOOLEAN BOOT
-## 5 drill.exec.memory.operator.output_batch_size 16777216 SYSTEM LONG BOOT
-## 6 drill.exec.storage.file.partition.column.label dir ALL STRING BOOT
-## 7 drill.exec.storage.implicit.filename.column.label filename ALL STRING BOOT
-## 8 drill.exec.storage.implicit.filepath.column.label filepath ALL STRING BOOT
-## 9 drill.exec.storage.implicit.fqn.column.label fqn ALL STRING BOOT
-## 10 drill.exec.storage.implicit.suffix.column.label suffix ALL STRING BOOT
-## # ... with 128 more rows
+## # A tibble: 179 x 6
+## name value defaultValue accessibleScopes kind optionScope
+## <chr> <chr> <chr> <chr> <chr> <chr>
+## 1 debug.validate_iterators FALSE false ALL BOOLE… BOOT
+## 2 debug.validate_vectors FALSE false ALL BOOLE… BOOT
+## 3 drill.exec.functions.cast_empty_string_to_null FALSE false ALL BOOLE… BOOT
+## 4 drill.exec.hashagg.fallback.enabled FALSE false ALL BOOLE… BOOT
+## 5 drill.exec.hashjoin.fallback.enabled FALSE false ALL BOOLE… BOOT
+## 6 drill.exec.memory.operator.output_batch_size 16777216 16777216 SYSTEM LONG BOOT
+## 7 drill.exec.memory.operator.output_batch_size_avail_mem_fac… 0.1 0.1 SYSTEM DOUBLE BOOT
+## 8 drill.exec.storage.file.partition.column.label dir dir ALL STRING BOOT
+## 9 drill.exec.storage.implicit.filename.column.label filename filename ALL STRING BOOT
+## 10 drill.exec.storage.implicit.filepath.column.label filepath filepath ALL STRING BOOT
+## # … with 169 more rows
drill_options(dc, "json")
-## # A tibble: 9 x 5
-## name value accessibleScopes kind optionScope
-## <chr> <chr> <chr> <chr> <chr>
-## 1 store.json.all_text_mode FALSE ALL BOOLEAN BOOT
-## 2 store.json.extended_types FALSE ALL BOOLEAN BOOT
-## 3 store.json.read_numbers_as_double FALSE ALL BOOLEAN BOOT
-## 4 store.json.reader.allow_nan_inf TRUE ALL BOOLEAN BOOT
-## 5 store.json.reader.print_skipped_invalid_record_number FALSE ALL BOOLEAN BOOT
-## 6 store.json.reader.skip_invalid_records FALSE ALL BOOLEAN BOOT
-## 7 store.json.writer.allow_nan_inf TRUE ALL BOOLEAN BOOT
-## 8 store.json.writer.skip_null_fields TRUE ALL BOOLEAN BOOT
-## 9 store.json.writer.uglify FALSE ALL BOOLEAN BOOT
+## # A tibble: 10 x 6
+## name value defaultValue accessibleScopes kind optionScope
+## <chr> <chr> <chr> <chr> <chr> <chr>
+## 1 store.hive.maprdb_json.optimize_scan_with_native_reader FALSE false ALL BOOLEAN BOOT
+## 2 store.json.all_text_mode TRUE false ALL BOOLEAN SYSTEM
+## 3 store.json.extended_types TRUE false ALL BOOLEAN SYSTEM
+## 4 store.json.read_numbers_as_double FALSE false ALL BOOLEAN BOOT
+## 5 store.json.reader.allow_nan_inf TRUE true ALL BOOLEAN BOOT
+## 6 store.json.reader.print_skipped_invalid_record_number TRUE false ALL BOOLEAN SYSTEM
+## 7 store.json.reader.skip_invalid_records TRUE false ALL BOOLEAN SYSTEM
+## 8 store.json.writer.allow_nan_inf TRUE true ALL BOOLEAN BOOT
+## 9 store.json.writer.skip_null_fields TRUE true ALL BOOLEAN BOOT
+## 10 store.json.writer.uglify TRUE false ALL BOOLEAN SYSTEM
```
## Working with parquet files
``` r
drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet` LIMIT 5")
-## Parsed with column specification:
-## cols(
-## N_COMMENT = col_character(),
-## N_NAME = col_character(),
-## N_NATIONKEY = col_integer(),
-## N_REGIONKEY = col_integer()
-## )
-## # A tibble: 5 x 4
-## N_COMMENT N_NAME N_NATIONKEY N_REGIONKEY
-## * <chr> <chr> <int> <int>
-## 1 haggle. carefully f ALGERIA 0 0
-## 2 al foxes promise sly ARGENTINA 1 1
-## 3 y alongside of the p BRAZIL 2 1
-## 4 eas hang ironic, sil CANADA 3 1
-## 5 y above the carefull EGYPT 4 4
+## # A tibble: 5 x 4
+## N_NATIONKEY N_NAME N_REGIONKEY N_COMMENT
+## <dbl> <chr> <dbl> <chr>
+## 1 0 ALGERIA 0 haggle. carefully f
+## 2 1 ARGENTINA 1 al foxes promise sly
+## 3 2 BRAZIL 1 y alongside of the p
+## 4 3 CANADA 1 eas hang ironic, sil
+## 5 4 EGYPT 4 y above the carefull
```
Including multiple parquet files in different directories (note the
-wildcard
-support):
+wildcard support):
``` r
drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nations*.parquet` LIMIT 5")
-## Parsed with column specification:
-## cols(
-## N_COMMENT = col_character(),
-## N_NAME = col_character(),
-## N_NATIONKEY = col_integer(),
-## dir0 = col_character(),
-## N_REGIONKEY = col_integer()
-## )
-## # A tibble: 5 x 5
-## N_COMMENT N_NAME N_NATIONKEY dir0 N_REGIONKEY
-## * <chr> <chr> <int> <chr> <int>
-## 1 haggle. carefully f ALGERIA 0 nationsSF 0
-## 2 al foxes promise sly ARGENTINA 1 nationsSF 1
-## 3 y alongside of the p BRAZIL 2 nationsSF 1
-## 4 eas hang ironic, sil CANADA 3 nationsSF 1
-## 5 y above the carefull EGYPT 4 nationsSF 4
+## # A tibble: 5 x 5
+## dir0 N_NATIONKEY N_NAME N_REGIONKEY N_COMMENT
+## <chr> <dbl> <chr> <dbl> <chr>
+## 1 nationsSF 0 ALGERIA 0 haggle. carefully f
+## 2 nationsSF 1 ARGENTINA 1 al foxes promise sly
+## 3 nationsSF 2 BRAZIL 1 y alongside of the p
+## 4 nationsSF 3 CANADA 1 eas hang ironic, sil
+## 5 nationsSF 4 EGYPT 4 y above the carefull
```
### Drill has built-in support for spatial ops
@@ 391,86 358,24 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
)
)
")
-## Parsed with column specification:
-## cols(
-## city = col_character(),
-## lon = col_double(),
-## lat = col_double()
-## )
-## # A tibble: 7 x 3
-## city lon lat
-## * <chr> <dbl> <dbl>
-## 1 Burbank -122. 37.3
-## 2 San Jose -122. 37.3
-## 3 Lick -122. 37.3
-## 4 Willow Glen -122. 37.3
-## 5 Buena Vista -122. 37.3
-## 6 Parkmoor -122. 37.3
-## 7 Fruitdale -122. 37.3
-```
-
-### Test Results
-
-``` r
-library(sergeant)
-library(testthat)
-##
-## Attaching package: 'testthat'
-## The following object is masked from 'package:dplyr':
-##
-## matches
-## The following object is masked from 'package:purrr':
-##
-## is_null
-
-date()
-## [1] "Sun Oct 14 08:27:29 2018"
-
-devtools::test()
-## Loading sergeant
-## Testing sergeant
-## ✔ | OK F W S | Context
-##
-⠏ | 0 | dplyr API
-⠋ | 1 | dplyr API
-⠙ | 2 | dplyr API
-⠹ | 3 | dplyr API
-✔ | 3 | dplyr API [0.3 s]
-##
-⠏ | 0 | REST API
-⠋ | 1 | REST API
-⠙ | 2 | REST API
-⠹ | 3 | REST API
-⠸ | 4 | REST API
-⠼ | 5 | REST API
-⠴ | 6 | REST API
-⠦ | 7 | REST API
-⠧ | 8 | REST API
-⠇ | 9 | REST API
-⠏ | 10 | REST API
-⠋ | 11 | REST API
-⠙ | 12 | REST API
-⠹ | 13 | REST API
-⠸ | 14 | REST API
-⠼ | 15 | REST API
-⠴ | 16 | REST API
-✔ | 16 | REST API [2.2 s]
-##
-## ══ Results ═══════════════════════════════════════════════════
-## Duration: 2.5 s
-##
-## OK: 19
-## Failed: 0
-## Warnings: 0
-## Skipped: 0
+## # A tibble: 7 x 3
+## city lon lat
+## <chr> <chr> <chr>
+## 1 Burbank -121.9316233 37.3232752
+## 2 San Jose -121.8949555 37.3393857
+## 3 Lick -121.8457863 37.2871647
+## 4 Willow Glen -121.8896771 37.3085532
+## 5 Buena Vista -121.9166227 37.3213308
+## 6 Parkmoor -121.9307898 37.3210531
+## 7 Fruitdale -121.932746 37.31086
```
-## sergeant Metrics
+### sergeant Metrics
-| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
-| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
-| R | 12 | 0.92 | 625 | 0.92 | 173 | 0.75 | 562 | 0.87 |
-| Rmd | 1 | 0.08 | 55 | 0.08 | 58 | 0.25 | 86 | 0.13 |
+| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
+| :--- | -------: | ---: | ---: | ---: | ----------: | ---: | -------: | ---: |
+| R | 18 | 0.95 | 1212 | 0.96 | 349 | 0.86 | 716 | 0.89 |
+| Rmd | 1 | 0.05 | 54 | 0.04 | 56 | 0.14 | 92 | 0.11 |
## Code of Conduct
A man/ctas_profile.Rd => man/ctas_profile.Rd +40 -0
@@ 0,0 1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ctas-profile.R
+\name{ctas_profile}
+\alias{ctas_profile}
+\title{Generate a Drill CTAS Statement from a Query}
+\usage{
+ctas_profile(x, new_table_name = "CHANGE____ME")
+}
+\arguments{
+\item{x}{a \code{tbl}}
+
+\item{new_table_name}{a new Drill data source spec (e.g. \code{dfs.xyz.`a.parquet`})}
+}
+\description{
+When working with CSV[H] files in Drill 1.15.0+ everything comes back
+\code{VARCHAR} since that's the way it should be. The old behaviour of
+\code{sergeant} to auto-type convert was kinda horribad wrong. However,
+it's a royal pain to make \href{https://drill.apache.org/docs/create-table-as-ctas/}{CTAS}
+queries from a giant list of \code{VARCHAR} field by hand. So, this is a
+helper function to do that, inspired by David Severski.
+}
+\note{
+WIP!
+}
+\examples{
+\dontrun{
+db <- src_drill("localhost")
+
+# Test with bare data source
+flt1 <- tbl(db, "dfs.d.`/flights.csvh`")
+
+cat(ctas_profile(flt1))
+
+# Test with SELECT
+flt2 <- tbl(db, sql("SELECT `year`, tailnum, time_hour FROM dfs.d.`/flights.csvh`"))
+
+cat(ctas_profile(flt2, "dfs.d.`flights.parquet`"))
+
+}
+}
A man/dbGetInfo.Rd => man/dbGetInfo.Rd +18 -0
@@ 0,0 1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dbi.r
+\docType{methods}
+\name{dbGetInfo,DrillDriver-method}
+\alias{dbGetInfo,DrillDriver-method}
+\alias{dbGetInfo,DrillConnection-method}
+\title{Metadata about database objects}
+\usage{
+\S4method{dbGetInfo}{DrillDriver}(dbObj)
+
+\S4method{dbGetInfo}{DrillConnection}(dbObj)
+}
+\arguments{
+\item{dbObj}{A \code{\linkS4class{DrillDriver}} or \code{\linkS4class{DrillConnection}} object}
+}
+\description{
+Metadata about database objects
+}
M man/drill_active.Rd => man/drill_active.Rd +8 -11
@@ 1,8 1,8 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_active}
\alias{drill_active}
-\title{Test whether Drill HTTP Dill direct REST API Interface server is up}
+\title{Test whether Drill HTTP Drill direct REST API Interface server is up}
\usage{
drill_active(drill_con)
}
@@ 18,19 18,16 @@ drill_connection() \%>\% drill_active()
}, silent=TRUE)
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_cancel}},
+Other Drill direct REST API Interface: \code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_cancel.Rd => man/drill_cancel.Rd +7 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_cancel}
\alias{drill_cancel}
\title{Cancel the query that has the given queryid}
@@ 18,19 18,16 @@ Cancel the query that has the given queryid
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_connection.Rd => man/drill_connection.Rd +7 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_connection}
\alias{drill_connection}
\title{Setup a Drill connection}
@@ 29,19 29,16 @@ interface immediately to prime the cookie-jar with the session id.
dc <- drill_connection()
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_custom_functions.Rd => man/drill_custom_functions.Rd +1 -0
@@ 22,6 22,7 @@ If you want a particular custom function mapped, file a PR or issue request in
the link found in the \code{DESCRIPTION} file.
\itemize{
\item \code{as.character(x)} : \code{CAST( x AS CHARACTER )}
+\item \code{as.integer64(x)} : \code{CAST( x AS BIGINT )}
\item \code{as.date(x)} : \code{CAST( x AS DATE )}
\item \code{as.logical(x)} : \code{CAST( x AS BOOLEAN) }
\item \code{as.numeric(x)} : \code{CAST( x AS DOUBLE )}
M man/drill_functions.Rd => man/drill_functions.Rd +7 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_functions}
\alias{drill_functions}
\title{Show all the available Drill built-in functions & UDFs}
@@ 30,19 30,16 @@ drill_connection() \%>\% drill_functions()
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_metrics.Rd => man/drill_metrics.Rd +7 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_metrics}
\alias{drill_metrics}
\title{Get the current memory metrics}
@@ 18,20 18,17 @@ drill_connection() \%>\% drill_metrics()
}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
- \code{\link{drill_options}},
+ \code{\link{drill_options}}, \code{\link{drill_opts}},
\code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_options.Rd => man/drill_options.Rd +7 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_options}
\alias{drill_options}
\title{List the name, default, and data type of the system and session options}
@@ 23,20 23,17 @@ drill_connection() \%>\% drill_options()
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
- \code{\link{drill_metrics}},
+ \code{\link{drill_metrics}}, \code{\link{drill_opts}},
\code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
A man/drill_opts.Rd => man/drill_opts.Rd +46 -0
@@ 0,0 1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rest-api.r
+\name{drill_opts}
+\alias{drill_opts}
+\title{Show all the available Drill options}
+\usage{
+drill_opts(drill_con, browse = FALSE)
+}
+\arguments{
+\item{drill_con}{drill server connection object setup by \code{drill_connection()}}
+
+\item{browse}{if \code{TRUE} display an HTML interacrtive HTML widget with the options
+as well as reutrn the data frame with the options Default if \code{FALSE}.}
+}
+\value{
+data frame
+}
+\description{
+Show all the available Drill options
+}
+\note{
+You \emph{must} be using Drill 1.15.0+ to use this function
+}
+\examples{
+\dontrun{
+drill_connection() \%>\% drill_opts()
+}
+}
+\references{
+\href{https://drill.apache.org/docs/querying-system-tables/#querying-the-options-table}{Drill documentation}
+}
+\seealso{
+Other Drill direct REST API Interface: \code{\link{drill_active}},
+ \code{\link{drill_cancel}},
+ \code{\link{drill_connection}},
+ \code{\link{drill_functions}},
+ \code{\link{drill_metrics}}, \code{\link{drill_options}},
+ \code{\link{drill_profiles}},
+ \code{\link{drill_profile}}, \code{\link{drill_query}},
+ \code{\link{drill_settings_reset}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
+ \code{\link{drill_system_reset}},
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
+}
+\concept{Drill direct REST API Interface}
M man/drill_profile.Rd => man/drill_profile.Rd +8 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_profile}
\alias{drill_profile}
\title{Get the profile of the query that has the given queryid}
@@ 18,19 18,17 @@ Get the profile of the query that has the given queryid
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}}, \code{\link{drill_query}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
+ \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_profiles.Rd => man/drill_profiles.Rd +8 -10
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_profiles}
\alias{drill_profiles}
\title{Get the profiles of running and completed queries}
@@ 21,19 21,17 @@ drill_connection() \%>\% drill_profiles()
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profile}}, \code{\link{drill_query}},
+ \code{\link{drill_opts}}, \code{\link{drill_profile}},
+ \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_query.Rd => man/drill_query.Rd +7 -10
@@ 22,7 22,7 @@ ignored if \code{drill_con} is a \code{JDBCConnection} created by
\description{
This function can handle REST API connections or JDBC connections. There is a benefit to
calling this function for JDBC connections vs a straight call to \code{dbGetQuery()} in
-that the function result is a `tbl_df` vs a plain \code{data.frame} so you get better
+that the function result is a \code{tbl_df} vs a plain \code{data.frame} so you get better
default printing (which can be helpful if you accidentally execute a query and the result
set is huge).
}
@@ 36,20 36,17 @@ drill_connection() \%>\%
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_set.Rd => man/drill_set.Rd +5 -8
@@ 9,7 9,7 @@ drill_set(drill_con, ..., type = c("session", "system"))
\arguments{
\item{drill_con}{drill server connection object setup by \code{drill_connection()}}
-\item{...}{named parameters to be sent to ALTER [SYSTEM|SESSION]}
+\item{...}{named parameters to be sent to \code{ALTER SYSTEM} or \code{ALTER SESSION}}
\item{type}{set the \code{session} or \code{system} parameter}
}
@@ 34,20 34,17 @@ drill_connection() \%>\%
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
\code{\link{drill_stats}}, \code{\link{drill_status}},
\code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_settings_reset.Rd => man/drill_settings_reset.Rd +6 -9
@@ 23,19 23,16 @@ drill_connection() \%>\% drill_settings_reset(exec.errors.verbose)
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_show_files.Rd => man/drill_show_files.Rd +2 -15
@@ 26,20 26,7 @@ drill_connection() \%>\% drill_show_files("dfs.tmp")
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
- \code{\link{drill_cancel}},
- \code{\link{drill_connection}},
- \code{\link{drill_functions}},
- \code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
- \code{\link{drill_profile}}, \code{\link{drill_query}},
- \code{\link{drill_settings_reset}},
- \code{\link{drill_set}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
- \code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+Other Dill direct REST API Interface: \code{\link{drill_show_schemas}},
+ \code{\link{drill_use}}
}
\concept{Dill direct REST API Interface}
M man/drill_show_schemas.Rd => man/drill_show_schemas.Rd +2 -14
@@ 19,19 19,7 @@ Returns a list of available schemas.
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
- \code{\link{drill_cancel}},
- \code{\link{drill_connection}},
- \code{\link{drill_functions}},
- \code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
- \code{\link{drill_profile}}, \code{\link{drill_query}},
- \code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
- \code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+Other Dill direct REST API Interface: \code{\link{drill_show_files}},
+ \code{\link{drill_use}}
}
\concept{Dill direct REST API Interface}
M man/drill_stats.Rd => man/drill_stats.Rd +7 -9
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_stats}
\alias{drill_stats}
\title{Get Drillbit information, such as ports numbers}
@@ 21,19 21,17 @@ drill_connection() \%>\% drill_stats()
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_status}}, \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_status}},
+ \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_status.Rd => man/drill_status.Rd +7 -9
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_status}
\alias{drill_status}
\title{Get the status of Drill}
@@ 21,19 21,17 @@ drill_connection() \%>\% drill_status()
}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_storage.Rd => man/drill_storage.Rd +18 -44
@@ 1,12 1,12 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_storage}
\alias{drill_storage}
\alias{drill_mod_storage}
\alias{drill_rm_storage}
-\title{Get the list of storage plugin names and configurations}
+\title{Retrieve, modify or update storage plugin names and configurations}
\usage{
-drill_storage(drill_con, plugin = NULL)
+drill_storage(drill_con, plugin = NULL, as = c("tbl", "list", "raw"))
drill_mod_storage(drill_con, name, config)
@@ 17,13 17,19 @@ drill_rm_storage(drill_con, name)
\item{plugin}{the assigned name in the storage plugin definition.}
+\item{as}{one of "\code{tbl}" or "\code{list}" or "\code{raw}". The latter two are useful if you want
+modify an existing storage plugin (e.g. add a workspace) via
+\code{\link[=drill_mod_storage]{drill_mod_storage()}}.}
+
\item{name}{name of the storage plugin configuration to create/update/remove}
-\item{config}{a \code{list} or raw character, valid JSON of a complete storage
-spec}
+\item{config}{a raw 1-element character vector containing valid JSON of a
+complete storage spec}
}
\description{
-Get the list of storage plugin names and configurations
+Retrieve, modify or remove storage plugins from a Drill instance. If you intend
+to modify an existing configuration it is suggested that you use the "\code{list}" or
+"\code{raw}" values to the \code{as} parameter to make it easier to modify them.
}
\examples{
\dontrun{
@@ 56,49 62,17 @@ drill_connection() \%>\%
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
- \code{\link{drill_cancel}},
- \code{\link{drill_connection}},
- \code{\link{drill_functions}},
- \code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
- \code{\link{drill_profile}}, \code{\link{drill_query}},
- \code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
-
-Other Dill direct REST API Interface: \code{\link{drill_active}},
- \code{\link{drill_cancel}},
- \code{\link{drill_connection}},
- \code{\link{drill_functions}},
- \code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
- \code{\link{drill_profile}}, \code{\link{drill_query}},
- \code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
-
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}},
- \code{\link{drill_version}}
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_system_reset.Rd => man/drill_system_reset.Rd +6 -8
@@ 25,18 25,16 @@ drill_connection() \%>\% drill_system_reset(all=TRUE)
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}}, \code{\link{drill_threads}},
- \code{\link{drill_use}}, \code{\link{drill_version}}
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
+ \code{\link{drill_threads}}, \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
M man/drill_threads.Rd => man/drill_threads.Rd +7 -9
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_threads}
\alias{drill_threads}
\title{Get information about threads}
@@ 21,19 21,17 @@ drill_connection() \%>\% drill_threads()
}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_use}}, \code{\link{drill_version}}
+ \code{\link{drill_version}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
A man/drill_up.Rd => man/drill_up.Rd +62 -0
@@ 0,0 1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/drill-docker.R
+\name{drill_up}
+\alias{drill_up}
+\alias{drill_down}
+\title{Start a Dockerized Drill Instance}
+\usage{
+drill_up(image = "drill/apache-drill:1.16.0", container_name = "drill",
+ data_dir = getwd(), remove = TRUE)
+
+drill_down(id)
+}
+\arguments{
+\item{image}{Drill image to use. Must be a valid image from
+\href{https://hub.docker.com/u/drill}{Drill's Docker Hub}. Defaults
+to most recent Drill docker image.}
+
+\item{container_name}{naem for the container. Defaults to "\code{drill}".}
+
+\item{data_dir}{valid path to a place where your data is stored; defaults to the
+value of \code{\link[=getwd]{getwd()}}. This will be \code{\link[=path.expand]{path.expand()}}ed and mapped to \code{/data}
+in the container. This will be mapped to the \code{dfs} storage plugin as the
+\code{dfs.d} workspace.}
+
+\item{remove}{remove the Drill container instance after it's stopped?
+Defaults to \code{TRUE} since you shouldn't be relying on this in production.}
+
+\item{id}{the id of the Drill container}
+}
+\value{
+a \code{stevedore} docker object (invisibly) which \emph{you} are responsible
+for killing with the \code{$stop()} function or from the Docker command
+line (in interactive mode the docker container ID is printed as well).
+}
+\description{
+This is a "get you up and running quickly" helper function as it only
+runs a standalone mode Drill instance and is optionally removed after the container
+is stopped. You should customize your own Drill containers based on the
+one at \href{https://hub.docker.com/u/drill}{Drill's Docker Hub}.
+}
+\details{
+The path specified in \code{data_dir} will be mapped inside the container as
+\code{/data} and a new \code{dfs} storage workspace will created (\code{dfs.d}) that
+maps to \code{/data} and is writable.
+
+Use \code{\link[=drill_down]{drill_down()}} to stop a running Drill container by container id
+(full or partial).
+}
+\note{
+this requires a working Docker setup on your system and it is \emph{highly suggested}
+you \code{docker pull} it yourself before running this function.
+}
+\examples{
+\dontrun{
+drill_up(data_dir = "~/Data")
+}
+}
+\seealso{
+Other Drill Docker functions: \code{\link{killall_drill}},
+ \code{\link{showall_drill}}
+}
+\concept{Drill Docker functions}
M man/drill_uplift.Rd => man/drill_uplift.Rd +4 -4
@@ 7,16 7,16 @@
drill_uplift(query_result)
}
\arguments{
-\item{query_result}{the result of a call to `drill_query()`}
+\item{query_result}{the result of a call to \code{drill_query()}}
}
\description{
-If you know the result of `drill_query()` will be a data frame, then
-you can pipe it to this function to pull out `rows` and automatically
+If you know the result of \code{drill_query()} will be a data frame, then
+you can pipe it to this function to pull out \code{rows} and automatically
type-convert it.
}
\details{
Not really intended to be called directly, but useful if you accidentally ran
-\code{drill_query()} without `uplift=TRUE` but want to then convert the structure.
+\code{drill_query()} without \code{uplift=TRUE} but want to then convert the structure.
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
M man/drill_use.Rd => man/drill_use.Rd +2 -14
@@ 22,19 22,7 @@ Change to a particular schema.
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
- \code{\link{drill_cancel}},
- \code{\link{drill_connection}},
- \code{\link{drill_functions}},
- \code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
- \code{\link{drill_profile}}, \code{\link{drill_query}},
- \code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
- \code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_version}}
+Other Dill direct REST API Interface: \code{\link{drill_show_files}},
+ \code{\link{drill_show_schemas}}
}
\concept{Dill direct REST API Interface}
M man/drill_version.Rd => man/drill_version.Rd +7 -9
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sergeant.r
+% Please edit documentation in R/rest-api.r
\name{drill_version}
\alias{drill_version}
\title{Identify the version of Drill running}
@@ 21,19 21,17 @@ drill_connection() \%>\% drill_version()
\href{https://drill.apache.org/docs/}{Drill documentation}
}
\seealso{
-Other Dill direct REST API Interface: \code{\link{drill_active}},
+Other Drill direct REST API Interface: \code{\link{drill_active}},
\code{\link{drill_cancel}},
\code{\link{drill_connection}},
\code{\link{drill_functions}},
\code{\link{drill_metrics}}, \code{\link{drill_options}},
- \code{\link{drill_profiles}},
+ \code{\link{drill_opts}}, \code{\link{drill_profiles}},
\code{\link{drill_profile}}, \code{\link{drill_query}},
\code{\link{drill_settings_reset}},
- \code{\link{drill_set}}, \code{\link{drill_show_files}},
- \code{\link{drill_show_schemas}},
- \code{\link{drill_stats}}, \code{\link{drill_status}},
- \code{\link{drill_storage}},
+ \code{\link{drill_set}}, \code{\link{drill_stats}},
+ \code{\link{drill_status}}, \code{\link{drill_storage}},
\code{\link{drill_system_reset}},
- \code{\link{drill_threads}}, \code{\link{drill_use}}
+ \code{\link{drill_threads}}
}
-\concept{Dill direct REST API Interface}
+\concept{Drill direct REST API Interface}
A man/format.DrillConnection.Rd => man/format.DrillConnection.Rd +16 -0
@@ 0,0 1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dbi.r
+\name{format.DrillConnection}
+\alias{format.DrillConnection}
+\title{A concise character representation (label) for a \code{DrillConnection}}
+\usage{
+\method{format}{DrillConnection}(x, ...)
+}
+\arguments{
+\item{x}{a \code{DrillConnection}}
+
+\item{...}{ignored}
+}
+\description{
+A concise character representation (label) for a \code{DrillConnection}
+}
A man/killall_drill.Rd => man/killall_drill.Rd +21 -0
@@ 0,0 1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/drill-docker.R
+\name{killall_drill}
+\alias{killall_drill}
+\title{Prune all dead and running Drill Docker containers}
+\usage{
+killall_drill()
+}
+\description{
+\emph{This is a destructive function.} It will stop \strong{any} Docker container that
+is based on an image matching a runtime command of "\code{bin/drill-embedded}".
+It's best used when you had a session forcefully interuppted and had been
+using the R helper functions to start/stop the Drill Docker container.
+You may want to consider using the Docker command-line interface to perform
+this work manually.
+}
+\seealso{
+Other Drill Docker functions: \code{\link{drill_up}},
+ \code{\link{showall_drill}}
+}
+\concept{Drill Docker functions}
A man/print.drill_conn.Rd => man/print.drill_conn.Rd +16 -0
@@ 0,0 1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rest-api.r
+\name{print.drill_conn}
+\alias{print.drill_conn}
+\title{Print function for \code{drill_conn} objects}
+\usage{
+\method{print}{drill_conn}(x, ...)
+}
+\arguments{
+\item{x}{a \code{drill_conn} object made with \code{\link[=drill_connection]{drill_connection()}}}
+
+\item{...}{unused}
+}
+\description{
+Print function for \code{drill_conn} objects
+}
M man/sergeant.Rd => man/sergeant.Rd +8 -6
@@ 13,10 13,10 @@ having to create and manage schemas. Some of the key features are:
}
\details{
\itemize{
- \item{Schema-free JSON document model similar to MongoDB and Elasticsearch}
- \item{Industry-standard APIs: ANSI SQL, ODBC/JDBC, RESTful APIs}
- \item{Extremely user and developer friendly}
- \item{Pluggable architecture enables connectivity to multiple datastores}
+\item{Schema-free JSON document model similar to MongoDB and Elasticsearch}
+\item{Industry-standard APIs: ANSI SQL, ODBC/JDBC, RESTful APIs}
+\item{Extremely user and developer friendly}
+\item{Pluggable architecture enables connectivity to multiple datastores}
}
Drill includes a distributed execution environment, purpose built for large-scale data
@@ 27,11 27,12 @@ the client.
You can install and run a Drillbit service on one node or on many nodes to form a
distributed cluster environment. When a Drillbit runs on each data node in a cluster,
Drill can maximize data locality during query execution without moving data over the
-network or between nodes. Drill uses ZooKeeper to maintain cluster membership and health
+network or between nodes. Drill uses Zookeeper to maintain cluster membership and health
check information.
Methods are provided to work with Drill via the REST APIs along with R
-\code{DBI} and \code{dplyr} interfaces.
+\code{DBI} and \code{dplyr} interfaces. Helper functions are included to facilitate
+using official 'Drill' 'Docker' images/containers.
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
@@ 39,3 40,4 @@ Methods are provided to work with Drill via the REST APIs along with R
\author{
Bob Rudis (bob@rud.is)
}
+\keyword{internal}
A man/showall_drill.Rd => man/showall_drill.Rd +17 -0
@@ 0,0 1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/drill-docker.R
+\name{showall_drill}
+\alias{showall_drill}
+\title{Show all dead and running Drill Docker containers}
+\usage{
+showall_drill()
+}
+\description{
+This function will show \emph{all} Docker containers that are based on an
+image matching a runtime command of "\code{bin/drill-embedded}".
+}
+\seealso{
+Other Drill Docker functions: \code{\link{drill_up}},
+ \code{\link{killall_drill}}
+}
+\concept{Drill Docker functions}
A tests/testthat/test-dbi-internal.R => tests/testthat/test-dbi-internal.R +50 -0
@@ 0,0 1,50 @@
+test_host <- Sys.getenv("DRILL_TEST_HOST", "localhost")
+
+options(sergeant.bigint.warnonce = FALSE)
+
+context("dbi")
+test_that("core DBI ops work", {
+
+ testthat::skip_on_cran()
+
+ con <- dbConnect(Drill(), test_host)
+ expect_is(con, "DrillConnection")
+
+ expect_true(dbIsValid(con))
+
+ fields <- dbListFields(con, "cp.`employee.json`")
+ expect_true(
+ all(
+ fields %in%
+ c(
+ "employee_id", "full_name", "first_name", "last_name", "position_id",
+ "position_title", "store_id", "department_id", "birth_date",
+ "hire_date", "salary", "supervisor_id", "education_level", "marital_status",
+ "gender", "management_role"
+ )
+ )
+ )
+
+ res <- dbSendQuery(con, "SELECT full_name from cp.`employee.json` LIMIT 1")
+ expect_is(res, "DrillResult")
+
+ xdf <- dbFetch(res)
+ expect_identical(dim(xdf), c(1L, 1L))
+
+ expect_true(dbClearResult(res))
+
+ expect_true(dbHasCompleted(res))
+
+ expect_equal(dbDataType(con, character(0)), "VARCHAR")
+ expect_equal(dbDataType(con, integer(0)), "INTEGER")
+ expect_equal(dbDataType(con, Sys.Date()), "DATE")
+ expect_equal(dbDataType(con, Sys.time()), "TIMESTAMP")
+ expect_equal(dbDataType(con, bit64::integer64(0)), "BIGINT")
+ expect_equal(dbDataType(con, numeric(0)), "DOUBLE")
+
+ expect_is(dbGetInfo(Drill()), "list")
+
+ inf <- dbGetInfo(con)
+ expect_equal(inf$port, 8047)
+
+})
A tests/testthat/test-dbitest.R => tests/testthat/test-dbitest.R +58 -0
@@ 0,0 1,58 @@
+test_host <- Sys.getenv("DRILL_TEST_HOST", "localhost")
+
+options(sergeant.bigint.warnonce = FALSE)
+
+testthat::skip_on_cran()
+
+expect_visible <- function(code) {
+ ret <- withVisible(code)
+ expect_true(ret$visible)
+ ret$value
+}
+
+connect <- function (drv) {
+ connect_call <- as.call(c(list(quote(dbConnect), drv)))
+ connect_fun <- function() {}
+ body(connect_fun) <- connect_call
+ connect_fun()
+}
+
+context("r-lib dbi interface test")
+
+dr <- Drill()
+
+expect_s4_class(dr, "DBIDriver")
+expect_is(dbGetInfo(dr), "list")
+expect_true(all(names(dbGetInfo(dr)) %in% c("driver.version", "client.version")))
+
+expect_equal(names(formals(dbConnect)), c("drv", "..."))
+expect_equal(names(formals(dbDisconnect)), c("conn", "..."))
+
+con <- expect_visible(dbConnect(dr, test_host))
+expect_s4_class(con, "DBIConnection")
+expect_true(dbDisconnect(con))
+
+expect_is(dbGetInfo(con), "list")
+
+expect_is(format(con), "character")
+
+expect_equal(names(formals(dbDataType)), c("dbObj", "obj", "..."))
+
+expect_error(dbDataType(con, NULL))
+
+expect_identical(dbDataType(con, letters), dbDataType(con, factor(letters)))
+expect_identical(dbDataType(con, letters), dbDataType(con, ordered(letters)))
+
+expect_true(
+ all(c("db.version", "dbname", "username", "host", "port") %in% names(dbGetInfo(con)))
+)
+
+expect_false("password" %in% names(dbGetInfo(con)))
+
+expect_equal(names(formals(dbListFields)), c("conn", "name", "..."))
+
+fields <- dbListFields(con, "cp.`employee.json`")
+expect_is(fields, "character")
+
+expect_warning(dbListFields(con, "missing"))
+
A tests/testthat/test-rest-api.R => tests/testthat/test-rest-api.R +50 -0
@@ 0,0 1,50 @@
+test_host <- Sys.getenv("DRILL_TEST_HOST", "localhost")
+
+options(sergeant.bigint.warnonce = FALSE)
+
+context("REST API")
+test_that("REST API works", {
+
+ testthat::skip_on_cran()
+
+ dc <- drill_connection(test_host)
+ expect_that(drill_active(dc), equals(TRUE))
+
+ suppressMessages(
+ drill_query(dc, "SELECT * FROM cp.`employee.json` limit 10", .progress = FALSE)
+ ) -> test_rest
+
+ expect_that(test_rest, is_a("data.frame"))
+
+ expect_that(drill_version(dc), is_a("character"))
+ expect_that(drill_metrics(dc), is_a("list"))
+ expect_that(drill_options(dc), is_a("tbl"))
+
+ dp <- drill_profiles(dc)
+
+ expect_that(dp, is_a("list"))
+ expect_that(drill_profile(dc, dp$finishedQueries[1]$queryId[1]), is_a("list"))
+ suppressMessages(
+ expect_that(drill_cancel(dc, dp$finishedQueries[1]$queryId[1]), equals(TRUE))
+ )
+ suppressMessages(
+ suppressWarnings(
+ expect_that(drill_show_files(dc, schema_spec = "dfs"), is_a("tbl"))
+ )
+ )
+ expect_that(drill_show_schemas(dc), is_a("tbl"))
+ expect_that(drill_storage(dc), is_a("tbl"))
+ expect_that(drill_stats(dc), is_a("list"))
+ expect_that(drill_status(dc), is_a("html"))
+ expect_that(drill_threads(dc), is_a("html"))
+ expect_that(drill_use(dc, "cp"), is_a("tbl"))
+ expect_that(
+ drill_set(
+ dc,
+ exec.errors.verbose=TRUE,
+ store.format="parquet",
+ web.logs.max_lines=20000),
+ is_a("tbl")
+ )
+
+})
M tests/testthat/test-sergeant.R => tests/testthat/test-sergeant.R +24 -29
@@ 1,9 1,14 @@
+library(dbplyr)
+library(dplyr)
+
test_host <- Sys.getenv("DRILL_TEST_HOST", "localhost")
-context("dplyr API")
-test_that("Core dbplyr ops work", {
+options(sergeant.bigint.warnonce = FALSE)
+
+context("basic d[b]plyr API")
+test_that("Core d[b]plyr ops work", {
- testthat::skip_on_cran()
+ testthat::skip_on_cran()
db <- src_drill(test_host)
@@ 12,42 17,32 @@ test_that("Core dbplyr ops work", {
test_dplyr <- tbl(db, "cp.`employee.json`")
expect_that(test_dplyr, is_a("tbl"))
- expect_that(dplyr::count(test_dplyr, gender), is_a("tbl"))
})
-context("REST API")
-test_that("REST API works", {
+context("extended d[b]plyr API")
+test_that("Extended d[b]plyr ops work", {
- testthat::skip_on_cran()
+ testthat::skip_on_cran()
- dc <- drill_connection(test_host)
- expect_that(drill_active(dc), equals(TRUE))
+ db <- src_drill(test_host)
- test_rest <- drill_query(dc, "SELECT * FROM cp.`employee.json` limit 10")
+ test_dplyr <- tbl(db, "cp.`employee.json`")
- expect_that(test_rest, is_a("data.frame"))
+ expect_that(dplyr::count(test_dplyr, gender), is_a("tbl"))
+ expect_true(sum(dplyr::collect(dplyr::count(test_dplyr, gender))[["n"]]) > 100)
- expect_that(drill_version(dc), is_a("character"))
- expect_that(drill_metrics(dc), is_a("list"))
- expect_that(drill_options(dc), is_a("tbl"))
+ emp_partial <- tbl(db, sql("SELECT full_name from cp.`employee.json`"))
+ expect_is(emp_partial, "tbl_drill")
- dp <- drill_profiles(dc)
+ fields <- db_query_fields(emp_partial$src$con, sql("SELECT full_name from cp.`employee.json`"))
+ expect_true(all(fields %in% c("full_name", "filename", "filepath", "fqn", "suffix")))
- expect_that(dp, is_a("list"))
- expect_that(drill_profile(dc, dp$finishedQueries[1]$queryId[1]), is_a("list"))
- expect_that(drill_cancel(dc, dp$finishedQueries[1]$queryId[1]), equals(TRUE))
- expect_that(drill_show_files(dc, schema_spec = "dfs"), is_a("tbl"))
- expect_that(drill_show_schemas(dc), is_a("tbl"))
- expect_that(drill_storage(dc), is_a("tbl"))
- expect_that(drill_stats(dc), is_a("list"))
- expect_that(drill_status(dc), is_a("html"))
- expect_that(drill_threads(dc), is_a("html"))
- expect_that(drill_use(dc, "cp"), is_a("tbl"))
- expect_that(drill_set(dc, exec.errors.verbose=TRUE,
- store.format="parquet",
- q = 4,
- web.logs.max_lines=20000), is_a("tbl"))
+ expln <- db_explain(emp_partial$src$con, sql("SELECT full_name from cp.`employee.json`"))
+ expect_true(grepl("groupscan", expln))
+ res <- select(emp_partial, full_name)
})
+
+