~fgaz/haskell-ucl

ad83916b1099364d5446c0817fd1678d5f507374 — Francesco Gazzetta 3 years ago v0.1.0.0
🌅
6 files changed, 281 insertions(+), 0 deletions(-)

A .gitignore
A CHANGELOG.md
A LICENSE
A src/Data/UCL.hs
A test/ucl-test.hs
A ucl.cabal
A  => .gitignore +4 -0
@@ 1,4 @@
.ghc.environment.*
dist
dist-newstyle
cabal.project.local

A  => CHANGELOG.md +5 -0
@@ 1,5 @@
# Revision history for ucl

## 0.1.0.0 -- 2021-02-07

* First version. Released on an unsuspecting world.

A  => LICENSE +30 -0
@@ 1,30 @@
Copyright (c) 2021, Francesco Gazzetta

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above
      copyright notice, this list of conditions and the following
      disclaimer in the documentation and/or other materials provided
      with the distribution.

    * Neither the name of Francesco Gazzetta nor the names of other
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

A  => src/Data/UCL.hs +171 -0
@@ 1,171 @@
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE PatternSynonyms #-}

module Data.UCL
( UCL(..)
, parseString
, parseByteString
, parseFile
) where

import Foreign.C
import Foreign.Ptr
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Text.Foreign as TF
import Data.Text (Text)
import qualified Data.Map as Map
import Data.Map (Map)
import Data.Time.Clock (DiffTime)
import Data.ByteString (ByteString, useAsCStringLen)


newtype ParserHandle = ParserHandle (Ptr ())
newtype UCLObjectHandle = UCLObjectHandle (Ptr ())
newtype UCLIterHandle = UCLIterHandle (Ptr ())

type UCL_TYPE = CUInt
pattern UCL_OBJECT :: UCL_TYPE
pattern UCL_OBJECT = 0
pattern UCL_ARRAY :: UCL_TYPE
pattern UCL_ARRAY = 1
pattern UCL_INT :: UCL_TYPE
pattern UCL_INT = 2
pattern UCL_FLOAT :: UCL_TYPE
pattern UCL_FLOAT = 3
pattern UCL_STRING :: UCL_TYPE
pattern UCL_STRING = 4
pattern UCL_BOOLEAN :: UCL_TYPE
pattern UCL_BOOLEAN = 5
pattern UCL_TIME :: UCL_TYPE
pattern UCL_TIME = 6
pattern UCL_USERDATA :: UCL_TYPE
pattern UCL_USERDATA = 7
pattern UCL_NULL :: UCL_TYPE
pattern UCL_NULL = 8


foreign import ccall "ucl_parser_new" ucl_parser_new :: CInt -> IO ParserHandle
foreign import ccall "ucl_parser_add_string" ucl_parser_add_string :: ParserHandle -> CString -> CUInt -> IO Bool
foreign import ccall "ucl_parser_add_file" ucl_parser_add_file :: ParserHandle -> CString -> IO Bool
foreign import ccall "ucl_parser_get_object" ucl_parser_get_object :: ParserHandle -> IO UCLObjectHandle
foreign import ccall "ucl_parser_get_error" ucl_parser_get_error :: ParserHandle -> IO CString

foreign import ccall "ucl_object_iterate_new" ucl_object_iterate_new :: UCLObjectHandle -> IO UCLIterHandle
foreign import ccall "ucl_object_iterate_safe" ucl_object_iterate_safe :: UCLIterHandle -> Bool -> IO UCLObjectHandle
foreign import ccall "ucl_object_type" ucl_object_type :: UCLObjectHandle -> UCL_TYPE
foreign import ccall "ucl_object_key" ucl_object_key :: UCLObjectHandle -> CString
foreign import ccall "ucl_object_toint" ucl_object_toint :: UCLObjectHandle -> CInt
foreign import ccall "ucl_object_todouble" ucl_object_todouble :: UCLObjectHandle -> CDouble
foreign import ccall "ucl_object_tostring" ucl_object_tostring :: UCLObjectHandle -> CString
foreign import ccall "ucl_object_toboolean" ucl_object_toboolean :: UCLObjectHandle -> Bool

foreign import ccall "strlen" c_strlen :: CString -> IO CSize


peekCStringText :: CString -> IO Text
peekCStringText cstr = do
  len <- c_strlen cstr
  TF.peekCStringLen (cstr, fromIntegral len)

-- | Parse a 'ByteString' into a 'UCL', resolving includes, macros, variables...
-- Note that unicode does not get converted when using 'fromString'.
-- Prefer 'parseString' when working on 'String's or literals.
--
-- >>> parseByteString $ fromString "{a: [1,2], b: 3min, a: [4]}"
-- Right (UCLMap (fromList
--   [ (UCLText "a", UCLArray [UCLInt 1, UCLInt 2, UCLInt 4])
--   , (UCLText "b", UCLTime 180s                           )
--   ]))
--
-- This function is __not__ safe to call on untrusted input: configurations can
-- read files, make http requests, do "billion laughs" attacks, and possibly
-- crash the parser.
parseByteString :: ByteString -> IO (Either String UCL)
parseByteString bs = useAsCStringLen bs parseCStringLen

-- | Parse a 'String' into a 'UCL', resolving includes, macros, variables...
--
-- >>> parseString "{a: [1,2], 🌅: 3min, a: [4]}"
-- Right (UCLMap (fromList
--   [ (UCLText "a"      , UCLArray [UCLInt 1, UCLInt 2, UCLInt 4])
--   , (UCLText "\127749", UCLTime 180s                           )
--   ]))
--
-- This function is __not__ safe to call on untrusted input: configurations can
-- read files, make http requests, do "billion laughs" attacks, and possibly
-- crash the parser.
parseString :: String -> IO (Either String UCL)
parseString s = do
  cs <- newCString s
  parseCStringLen (cs, length s)

parseCStringLen :: CStringLen -> IO (Either String UCL)
parseCStringLen (cs, len) = do
  p <- ucl_parser_new 0x0
  didParse <- ucl_parser_add_string p cs $ fromIntegral len
  if didParse
  then Right . handleToUCL <$> ucl_parser_get_object p
  else Left <$> (ucl_parser_get_error p >>= peekCString)

-- | Parse the contents of a file into a 'UCL', resolving includes, macros,
-- variables...
--
-- This function is __not__ safe to call on untrusted input: configurations can
-- read files, make http requests, do "billion laughs" attacks, and possibly
-- crash the parser.
parseFile :: FilePath -> IO (Either String UCL)
parseFile s = do
    cs <- newCString s
    p <- ucl_parser_new 0x0
    didParse <- ucl_parser_add_file p cs
    if didParse
    then Right . handleToUCL <$> ucl_parser_get_object p
    else Left <$> (ucl_parser_get_error p >>= peekCString)

-- | An UCL object
data UCL = UCLMap (Map UCL UCL)
         | UCLArray [UCL]
         | UCLInt Int
         | UCLDouble Double
         | UCLText Text
         | UCLBool Bool
         | UCLTime DiffTime
  deriving (Show, Eq, Ord)

handleToUCL :: UCLObjectHandle -> UCL
handleToUCL o = typedHandleToUCL (ucl_object_type o) o

typedHandleToUCL :: UCL_TYPE -> UCLObjectHandle -> UCL
typedHandleToUCL UCL_OBJECT   obj = UCLMap $ uclObjectToMap obj
typedHandleToUCL UCL_ARRAY    obj = UCLArray $ uclArrayToList obj
typedHandleToUCL UCL_INT      obj = UCLInt $ fromIntegral $ ucl_object_toint obj
typedHandleToUCL UCL_FLOAT    obj = UCLDouble $ realToFrac $ ucl_object_todouble obj
typedHandleToUCL UCL_STRING   obj = UCLText $ unsafePerformIO $ peekCStringText $ ucl_object_tostring obj
typedHandleToUCL UCL_BOOLEAN  obj = UCLBool $ ucl_object_toboolean obj
typedHandleToUCL UCL_TIME     obj = UCLTime $ realToFrac $ ucl_object_todouble obj
typedHandleToUCL UCL_USERDATA _   = error "Userdata object"
typedHandleToUCL UCL_NULL     _   = error "Null object"
typedHandleToUCL _            _   = error "Unknown Type"

uclObjectToMap :: UCLObjectHandle -> Map UCL UCL
uclObjectToMap o = unsafePerformIO $ do
  iter <- ucl_object_iterate_new o
  go iter Map.empty
  where 
    go it m = do
      obj <- ucl_object_iterate_safe it True
      case ucl_object_type obj of
        UCL_NULL -> pure m
        _        -> go it $ Map.insert (getUclKey obj) (handleToUCL obj) m
    getUclKey obj = UCLText $ unsafePerformIO $ peekCStringText $ ucl_object_key obj

uclArrayToList :: UCLObjectHandle -> [UCL]
uclArrayToList o = unsafePerformIO $ do
  iter <- ucl_object_iterate_new o
  go iter
  where 
    go it = do
      obj <- ucl_object_iterate_safe it True
      case ucl_object_type obj of
        UCL_NULL -> pure []
        _        -> (handleToUCL obj :) <$> go it

A  => test/ucl-test.hs +17 -0
@@ 1,17 @@
{-# LANGUAGE OverloadedStrings #-}
module Main where

import Data.UCL
import Data.Map (fromList)
import Control.Monad (unless)
import System.Exit (exitFailure)

-- TODO use quickcheck when printing is implemented
main :: IO ()
main = do
  parsed1 <- parseString "0: 1min"
  print parsed1
  parsed <- parseString "\"a\": [12,34], 1:2, 1:3, 2:\"ab🌅c\", 3: yes, \"a\": [56]"
  print parsed
  unless (parsed == Right (UCLMap (fromList [(UCLText "1",UCLInt 2),(UCLText "2",UCLText "ab\127749c"),(UCLText "3",UCLBool True),(UCLText "a",UCLArray [UCLInt 12,UCLInt 34])])))
     exitFailure

A  => ucl.cabal +54 -0
@@ 1,54 @@
cabal-version:       2.2

name:                ucl
version:             0.1.0.0
synopsis:            Datatype and parser for the Universal Configuration Language (UCL) using libucl
description:
  The Universal Configuration Language (UCL) is a configuration language
  inspired by nginx configuration files and compatible with JSON.
  For a complete description of the language, see [the libucl readme](https://github.com/vstakhov/libucl/blob/master/README.md).
  .
  This library contains a datatype representing UCL objects, and a parser.
  It is based on the C library [libucl](https://github.com/vstakhov/libucl),
  which is needed to build this package.
-- bug-reports:         mailto:fgaz@fgaz.me
license:             BSD-3-Clause
license-file:        LICENSE
author:              Francesco Gazzetta
maintainer:          fgaz@fgaz.me
copyright:           Francesco Gazzetta 2021
category:            Data, Configuration
extra-source-files:  CHANGELOG.md

source-repository head
  type:                git
  location:            https://git.sr.ht/~fgaz/haskell-ucl

common common
  default-language:    Haskell2010
  build-depends:       base ^>=4.13.0.0
                         || ^>=4.14.1.0
                     , containers ^>=0.6.2.1
  ghc-options:         -Wall

library
  import:              common
  hs-source-dirs:      src
  exposed-modules:     Data.UCL
  -- other-modules:
  other-extensions:    ForeignFunctionInterface
                     , PatternSynonyms
  build-depends:       text ^>=1.2.4.0
                     , bytestring ^>=0.10.10
                               || ^>=0.11.0.0
                     , time ^>=1.9.3
                         || ^>=1.11.1.1
  pkgconfig-depends:   libucl

test-suite ucl-test
  import:              common
  type:                exitcode-stdio-1.0
  hs-source-dirs:      test
  main-is:             ucl-test.hs
  other-extensions:    OverloadedStrings
  build-depends:       ucl