Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XPath eval and dump to string #1

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions c14n.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ source-repository head
library
exposed-modules:
Text.XML.C14N
Text.XML.C14N.Internal
Text.XML.C14N.LibXML
other-modules:
Paths_c14n
hs-source-dirs:
src
ghc-options: -W
cc-options: -std=c99 -Wno-builtin-declaration-mismatch
c-sources:
cbits/libxml.c
extra-libraries:
Expand All @@ -40,4 +42,8 @@ library
build-depends:
base >=4.8 && <5
, bytestring >=0.9 && <0.11
, containers
, inline-c
, template-haskell
, vector
default-language: Haskell2010
8 changes: 8 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,17 @@ extra-libraries:
dependencies:
- base >= 4.8 && < 5
- bytestring >= 0.9 && < 0.11
- inline-c
- template-haskell
- containers
- vector

library:
source-dirs: src
c-sources: cbits/*.c
ghc-options:
- -W
cc-options:
# enable `strdup`:
- -std=c99
- -Wno-builtin-declaration-mismatch
120 changes: 112 additions & 8 deletions src/Text/XML/C14N.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module Text.XML.C14N (
c14n_exclusive_1_0,
c14n_1_1,
c14n,
LibXMLDoc,

-- * Parsing
xml_opt_recover,
Expand All @@ -38,18 +39,39 @@ module Text.XML.C14N (
xml_opt_oldsax,
xml_opt_ignore_env,
xml_opt_big_lines,
parseXml
defaultOpts,
parseXml,
parseHtml,

evalXPath',
evalXPath'',
evalXPath'each,
evalXPath,
evalXPathArr,

nodePathIdx,
nodeByPath,
dumpNode ,
nodeFirstChild,
nodeChildren,
nodeNext,
nodeName,
isNullPtr,
nodePositionInNamesakes
) where

--------------------------------------------------------------------------------

import Control.Monad
import Control.Exception

import Data.Bits
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as BS
import Data.Vector (Vector)

import Text.XML.C14N.LibXML
import Text.XML.C14N.LibXML

import Foreign.Ptr
import Foreign.ForeignPtr
Expand All @@ -60,19 +82,30 @@ import Foreign.C.Types

--------------------------------------------------------------------------------

defaultOpts :: [CInt]
defaultOpts = [xml_opt_recover, xml_opt_noent, xml_opt_noerror, xml_opt_nonet, xml_opt_compact]

-- | 'parseXml' @parseOpts text@ parses @text@ into an XML document using
-- libxml according to options given by @parseOpts@.
parseXml :: [CInt] -> BS.ByteString -> IO (ForeignPtr LibXMLDoc)
parseXml :: [CInt] -> ByteString -> IO (ForeignPtr LibXMLDoc)
parseXml opts bin = newForeignPtr xmlFreeDoc =<<
(BS.unsafeUseAsCStringLen bin $ \(ptr, len) ->
throwErrnoIfNull "xmlReadMemory" $ xmlReadMemory
ptr (fromIntegral len) nullPtr nullPtr (foldl (.|.) 0 opts))

-- | 'parseXml' @parseOpts text@ parses @text@ into an XML document using
-- libxml according to options given by @parseOpts@.
parseHtml :: [CInt] -> ByteString -> IO (ForeignPtr LibXMLDoc)
parseHtml opts bin = newForeignPtr xmlFreeDoc =<<
(BS.unsafeUseAsCStringLen bin $ \(ptr, len) ->
throwErrnoIfNull "htmlReadMemory" $ htmlReadMemory
ptr (fromIntegral len) nullPtr nullPtr (foldl (.|.) 0 opts))

-- | 'withXmlXPathNodeList' @docPtr xPathLocation continuation@ evaluates the
-- XPath location path given by @xPathLocation@ in the document context
-- pointed at by @docPtr@ and calls @continuation@ with the result.
withXmlXPathNodeList :: Ptr LibXMLDoc
-> BS.ByteString
-> ByteString
-> (Ptr LibXMLNodeSet -> IO a)
-> IO a
withXmlXPathNodeList docPtr expr cont =
Expand All @@ -92,6 +125,7 @@ withXmlXPathNodeList docPtr expr cont =
-- the XPath object structure contains the node set pointer
-- at offset 8; see
-- http://xmlsoft.org/html/libxml-xpath.html#xmlXPathObject
-- TODO here we need to check xmlXPathObjectType!
$ \a -> peekByteOff a 8 >>= cont

-- | 'c14n' @parseOpts mode nsPrefixes keepComments xPathLocation input@
Expand All @@ -104,11 +138,11 @@ withXmlXPathNodeList docPtr expr cont =
-- be included in the canonicalised result.
c14n :: [CInt]
-> CInt
-> [BS.ByteString]
-> [ByteString]
-> Bool
-> Maybe BS.ByteString
-> BS.ByteString
-> IO BS.ByteString
-> Maybe ByteString
-> ByteString
-> IO ByteString
c14n opts mode nsPrefixes keepComments xpath bin =
-- parse the input xml
parseXml opts bin >>= \docPtr ->
Expand Down Expand Up @@ -144,3 +178,73 @@ c14n opts mode nsPrefixes keepComments xpath bin =
ptrPtr (fromIntegral numBytes) (freeXml ptrPtr)

--------------------------------------------------------------------------------
--
--

withXmlBuffer :: (Ptr LibXMLBuffer -> IO a) -> IO (ByteString, a)
withXmlBuffer act =
let bufferSize = 1024*1024*100 in
bracket
(throwErrnoIfNull "xmlCreateBufferSize" $ xmlCreateBufferSize bufferSize)
xmlBufferFree
(\buf -> do
res <- act buf
cstr <- xmlBufferContent buf
bstr <- BS.packCString cstr
return (bstr, res)
)


evalXPath'' :: ForeignPtr LibXMLDoc -- ^ input document
-> ByteString -- ^ input xpath
-> (Ptr LibXMLNodeSet -> IO a) -- ^ convertor
-> IO a -- ^ result
evalXPath'' parsedDoc xpath fun =
withForeignPtr parsedDoc $ \ptr ->
withXmlXPathNodeList ptr xpath fun

evalXPath'each :: ForeignPtr LibXMLDoc -- ^ input document
-> ByteString -- ^ input xpath
-> (Ptr LibXMLNode -> IO a) -- ^ convertor
-> IO (Vector a) -- ^ result
evalXPath'each parsedDoc xpath mapper =
withForeignPtr parsedDoc $ \ptr ->
withXmlXPathNodeList ptr xpath $ \nsPtr ->
xmlNodeSetMap nsPtr mapper


evalXPath' :: ForeignPtr LibXMLDoc -- ^ input document
-> ByteString -- ^ input xpath
-> IO (Maybe ByteString) -- ^ result in string form
evalXPath' parsedDoc xpath =
evalXPath'' parsedDoc xpath $ \nsPtr -> do
(str, haveResult) <- withXmlBuffer $ \bufferPtr -> do
errCode <- xmlNodeSetDump bufferPtr nsPtr
when (errCode < 0) $ fail ("Buffer error: " ++ show (errCode + 100))
return (errCode == 0)
return $ if haveResult then Just str else Nothing


evalXPathArr' :: ForeignPtr LibXMLDoc -- ^ input document
-> ByteString -- ^ input xpath
-> IO (Vector ByteString) -- ^ result in string form
evalXPathArr' parsedDoc xpath =
withForeignPtr parsedDoc $ \ptrParsedDoc ->
withXmlXPathNodeList ptrParsedDoc xpath $ \nsPtr ->
xmlNodeSetDumpArr nsPtr


evalXPath :: ByteString -- ^ input document
-> ByteString -- ^ input xpath
-> IO (Maybe ByteString) -- ^ result in string form
evalXPath doc xpath =
parseHtml defaultOpts doc >>= flip evalXPath' xpath



evalXPathArr :: ByteString -- ^ input document
-> ByteString -- ^ input xpath
-> IO (Vector ByteString) -- ^ result in string form
evalXPathArr doc xpath =
parseHtml defaultOpts doc >>= flip evalXPathArr' xpath

43 changes: 43 additions & 0 deletions src/Text/XML/C14N/Internal.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.XML.C14N.Internal where

import qualified Data.Map as Map
import Data.Monoid ((<>), mempty)
import qualified Language.Haskell.TH as TH

import qualified Language.C.Inline as C
import Language.C.Inline.Context
import qualified Language.C.Types as CT


-- | XML documents
data LibXMLDoc


-- | XML node
data LibXMLNode


-- | XML node sets
data LibXMLNodeSet


-- | XML Buffer
data LibXMLBuffer


xmlCtx :: C.Context
xmlCtx = baseCtx <> bsCtx <> ctx
where
ctx = mempty { ctxTypesTable = xmlTypesTable }


xmlTypesTable :: Map.Map CT.TypeSpecifier TH.TypeQ
xmlTypesTable = Map.fromList
[ (CT.TypeName "xmlDoc", [t| LibXMLDoc |])
, (CT.TypeName "xmlNode", [t| LibXMLNode |])
, (CT.TypeName "xmlNodeSet", [t| LibXMLNodeSet |])
, (CT.TypeName "xmlBuffer", [t| LibXMLBuffer |])
]
Loading