Skip to content

Commit

Permalink
Merge pull request #98 from lalaithion/gitparsing
Browse files Browse the repository at this point in the history
Use Attoparsec for parsing git output instead of manually splitting Text
  • Loading branch information
patrickt authored Jun 10, 2019
2 parents 0257625 + aa02a62 commit eaf1378
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 26 deletions.
61 changes: 35 additions & 26 deletions src/Semantic/Git.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@ module Semantic.Git
, ObjectType(..)
, ObjectMode(..)
, OID(..)

-- Testing Purposes
, parseEntries
, parseEntry
) where

import Control.Monad.IO.Class
import Data.Text as Text
import Shelly hiding (FilePath)
import System.IO (hSetBinaryMode)
import Data.Attoparsec.Text (Parser)
import Data.Attoparsec.Text as AP
import Data.Char
import Data.Text as Text
import Shelly hiding (FilePath)
import System.IO (hSetBinaryMode)

-- | git clone --bare
clone :: Text -> FilePath -> IO ()
Expand All @@ -24,22 +31,38 @@ clone url path = sh $ do
-- | git cat-file -p
catFile :: FilePath -> OID -> IO Text
catFile gitDir (OID oid) = sh $ do
run "git" [pack ("--git-dir=" <> gitDir), "cat-file", "-p", oid]
run "git" ["-C", pack gitDir, "cat-file", "-p", oid]

-- | git ls-tree -rz
lsTree :: FilePath -> OID -> IO [TreeEntry]
lsTree gitDir (OID sha) = sh $ do
out <- run "git" [pack ("--git-dir=" <> gitDir), "ls-tree", "-rz", sha]
pure $ mkEntry <$> splitOn "\NUL" out
where
mkEntry row | [mode, ty, rest] <- splitOn " " row
, [oid, path] <- splitOn "\t" rest
= TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path)
| otherwise = nullTreeEntry
lsTree gitDir (OID sha) = sh $ parseEntries <$> run "git" ["-C", pack gitDir, "ls-tree", "-rz", sha]

sh :: MonadIO m => Sh a -> m a
sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True))

-- | Parses an list of entries separated by \NUL, and on failure return []
parseEntries :: Text -> [TreeEntry]
parseEntries = either (const []) id . AP.parseOnly everything
where
everything = AP.sepBy entryParser "\NUL" <* "\NUL\n" <* AP.endOfInput

-- | Parse the entire input with entryParser, and on failure return a default
-- For testing purposes only
parseEntry :: Text -> Either String TreeEntry
parseEntry = AP.parseOnly (entryParser <* AP.endOfInput)

-- | Parses a TreeEntry
entryParser :: Parser TreeEntry
entryParser = TreeEntry
<$> modeParser <* AP.char ' '
<*> typeParser <* AP.char ' '
<*> oidParser <* AP.char '\t'
<*> (unpack <$> AP.takeWhile (/= '\NUL'))
where
typeParser = AP.choice [BlobObject <$ "blob", TreeObject <$ "tree"]
modeParser = AP.choice [NormalMode <$ "100644", ExecutableMode <$ "100755", SymlinkMode <$ "120000", TreeMode <$ "040000"]
oidParser = OID <$> AP.takeWhile isHexDigit

newtype OID = OID Text
deriving (Eq, Show, Ord)

Expand All @@ -51,24 +74,12 @@ data ObjectMode
| OtherMode
deriving (Eq, Show)

objectMode :: Text -> ObjectMode
objectMode "100644" = NormalMode
objectMode "100755" = ExecutableMode
objectMode "120000" = SymlinkMode
objectMode "040000" = TreeMode
objectMode _ = OtherMode

data ObjectType
= BlobObject
| TreeObject
| OtherObjectType
deriving (Eq, Show)

objectType :: Text -> ObjectType
objectType "blob" = BlobObject
objectType "tree" = TreeObject
objectType _ = OtherObjectType

data TreeEntry
= TreeEntry
{ treeEntryMode :: ObjectMode
Expand All @@ -77,5 +88,3 @@ data TreeEntry
, treeEntryPath :: FilePath
} deriving (Eq, Show)

nullTreeEntry :: TreeEntry
nullTreeEntry = TreeEntry OtherMode OtherObjectType (OID mempty) mempty
18 changes: 18 additions & 0 deletions test/Semantic/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Semantic.Spec (spec) where
import Data.Diff
import Data.Patch
import Semantic.Api hiding (Blob)
import Semantic.Git
import System.Exit

import SpecHelpers
Expand All @@ -24,5 +25,22 @@ spec = parallel $ do
it "renders with the specified renderer" $ do
output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob]
output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n"

describe "git ls-tree parsing" $ do
it "parses a git output string" $ do
let input = "100644 tree abcdef\t/this/is/the/path"
let expected = Right $ TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path"
parseEntry input `shouldBe` expected

it "allows whitespace in the path" $ do
let input = "100644 tree 12345\t/this\n/is\t/the /path\r"
let expected = Right $ TreeEntry NormalMode TreeObject (OID "12345") "/this\n/is\t/the /path\r"
parseEntry input `shouldBe` expected

it "parses many outputs separated by \\NUL" $ do
let input = "100644 tree abcdef\t/this/is/the/path\NUL120000 blob 17776\t/dev/urandom\NUL\n"
let expected = [ TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path", TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"]
parseEntries input `shouldBe` expected

where
methodsBlob = makeBlob "def foo\nend\n" "methods.rb" Ruby mempty

0 comments on commit eaf1378

Please sign in to comment.