Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Quasi)ABTs for syntax types #715

Merged
merged 60 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
7e990ef
Define a VM module.
robrix Mar 15, 2024
186ae4f
Re-export env & store effects.
robrix Mar 15, 2024
94b463e
Move let' & letrec into the VM module.
robrix Mar 15, 2024
9df11b4
Define a module for Python syntax.
robrix Mar 15, 2024
14cdce0
Move the Term type into its own module.
robrix Mar 15, 2024
25485ff
:fire: re-exports.
robrix Mar 15, 2024
fba6ecf
:fire: all the JSON parsing stuff.
robrix Mar 15, 2024
f8a8f06
Initial stab at ABT-style term representations.
robrix Mar 15, 2024
ef985dc
Convert Vec to [].
robrix Mar 15, 2024
67c7779
Define a factored Python type.
robrix Mar 15, 2024
54f32e9
Define a smart constructor for no-ops.
robrix Mar 15, 2024
71073d7
Use inductive naturals.
robrix Mar 15, 2024
138bea9
Equate Vec.
robrix Mar 15, 2024
b798092
Go quantify a kite.
robrix Mar 15, 2024
fc4e7e4
Represent variables in the abstract trees.
robrix Mar 15, 2024
57aed6b
Merge branch 'all-code-dies-not-all-code-truly-lives' into always-be-…
robrix Mar 15, 2024
076e967
Define an Ord instance for Vec.
robrix Mar 15, 2024
09c57ed
Define an Ord instance for Vec.
robrix Mar 15, 2024
3987aa9
Define Foldable instances for Vec.
robrix Mar 15, 2024
22d693c
Simplify the Foldable instances.
robrix Mar 15, 2024
6b25a75
Spacing.
robrix Mar 15, 2024
175e819
Generic subterms.
robrix Mar 15, 2024
11b8caa
An additional smart constructor.
robrix Mar 15, 2024
0a33a89
Define a pattern synonym for no-ops.
robrix Mar 15, 2024
d6263ff
Define a pattern synonym for conditionals.
robrix Mar 15, 2024
45fa9a2
:fire: the function constructors.
robrix Mar 15, 2024
1bfe2bb
Bool.
robrix Mar 15, 2024
fef8e66
String.
robrix Mar 15, 2024
155ecdd
Throw.
robrix Mar 18, 2024
a59ff6e
Import.
robrix Mar 18, 2024
42ab0be
Function.
robrix Mar 18, 2024
dcf7d40
Bidirectional.
robrix Mar 18, 2024
b6a9fae
Call.
robrix Mar 18, 2024
b19baf0
Locate.
robrix Mar 18, 2024
ed23111
Completeness.
robrix Mar 18, 2024
948a5c7
Sequencing.
robrix Mar 18, 2024
e1a6215
HLint.
robrix Mar 18, 2024
fecfa07
Let.
robrix Mar 18, 2024
b9ba190
Port eval over.
robrix Mar 18, 2024
692c3f2
:fire: Term.
robrix Mar 18, 2024
e7c5928
Rename the synonyms to elide the primes.
robrix Mar 18, 2024
cdde997
Spacing.
robrix Mar 18, 2024
be357c0
Define a synonym for Term.
robrix Mar 18, 2024
f96b14d
Tone down the fancy.
robrix Mar 19, 2024
6ea208f
:fire: toList.
robrix Mar 19, 2024
0297116
Dial the fancy even further back.
robrix Mar 19, 2024
4ed026b
Define an Eq1 instance for Python.
robrix Mar 19, 2024
5ccf337
Define an Ord1 instance for Python.
robrix Mar 19, 2024
1df04e8
:fire: extensions.
robrix Mar 19, 2024
6812e35
Terms are free monads.
robrix Mar 19, 2024
ff22c0b
General fold over Terms.
robrix Mar 19, 2024
3861c38
Paramorphic fold.
robrix Mar 19, 2024
050188f
Define subterms paramorphically.
robrix Mar 19, 2024
c746f93
Mendler-style recursion.
robrix Mar 19, 2024
bcca57a
Align.
robrix Mar 19, 2024
07a5740
Define foldTerm using mendlerTerm.
robrix Mar 19, 2024
809ab74
UndecidableInstances instead of QuantifiedConstraints.
robrix Mar 19, 2024
d22bd1e
Mendler-style paramorphism.
robrix Mar 19, 2024
ad4dba6
Define para using mendlerPara.
robrix Mar 19, 2024
1d70d13
Define subterms using mendlerPara.
robrix Mar 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions semantic-analysis/semantic-analysis.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ library
Analysis.Project
Analysis.Reference
Analysis.Syntax
Analysis.Syntax.Python
Analysis.VM
build-depends:
, aeson >= 1.4 && < 3
, base >= 4.13 && < 5
, bytestring >= 0.10.8.2 && < 0.13
, containers ^>= 0.6
, filepath
, fused-effects ^>= 1.1
, hashable
, semantic-source ^>= 0.2
, text ^>= 1.2.3.1
, transformers ^>= 0.5
, vector ^>= 0.12.3
1 change: 0 additions & 1 deletion semantic-analysis/src/Analysis/Name.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Analysis.Name
( Name
-- * Constructors
Expand Down
283 changes: 33 additions & 250 deletions semantic-analysis/src/Analysis/Syntax.hs
Original file line number Diff line number Diff line change
@@ -1,270 +1,53 @@
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
module Analysis.Syntax
( -- * Terms
( -- * Syntax
Term(..)
, subterms
-- * Abstract interpretation
, eval0
, eval
-- * Macro-expressible syntax
, let'
, letrec
-- * Parsing
, parseFile
, parseGraph
, parseNode
-- * Debugging
, analyzeFile
, parseToTerm
, foldTerm
, paraTerm
, mendlerTerm
, mendlerParaTerm
) where

import qualified Analysis.Carrier.Statement.State as S
import Analysis.Effect.Domain
import Analysis.Effect.Env (Env, bind, lookupEnv)
import Analysis.Effect.Store
import Analysis.File
import Analysis.Name (Name, name)
import Analysis.Reference as Ref
import Control.Applicative (Alternative (..), liftA2, liftA3)
import Control.Carrier.Throw.Either (runThrow)
import Control.Effect.Labelled
import Control.Effect.Reader
import Control.Effect.Throw (Throw, throwError)
import Control.Exception
import Control.Monad (guard)
import Control.Monad.IO.Class
import qualified Data.Aeson as A
import qualified Data.Aeson.Parser as A
import qualified Data.Aeson.Types as A
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold, foldl')
import Data.Function (fix)
import qualified Data.IntMap as IntMap
import Data.List (sortOn)
import Data.List.NonEmpty (NonEmpty, fromList)
import Data.Maybe (listToMaybe)
import Data.Monoid (First (..))
import qualified Data.Set as Set
import Data.String (IsString (..))
import Data.Text (Text)
import qualified Data.Vector as V
import qualified Source.Source as Source
import Source.Span
import System.FilePath

data Term
= Var Name
| Noop
| Iff Term Term Term
| Bool Bool
| String Text
| Throw Term
| Let Name Term Term
| Term :>> Term
| Import (NonEmpty Text)
| Function Name [Name] Term
| Call Term [Term]
| Locate Span Term
deriving (Eq, Ord, Show)
-- Syntax

infixl 1 :>>
-- | (Currently) untyped term representations.
data Term sig v
= Var v
| Term (sig (Term sig v))

Comment on lines +17 to +20
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A free monad by any other name

subterms :: Term -> Set.Set Term
subterms t = Set.singleton t <> case t of
Var _ -> mempty
Noop -> mempty
Iff c t e -> subterms c <> subterms t <> subterms e
Bool _ -> mempty
String _ -> mempty
Throw t -> subterms t
Let _ v b -> subterms v <> subterms b
a :>> b -> subterms a <> subterms b
Import _ -> mempty
Function _ _ b -> subterms b
Call f as -> subterms f <> foldMap subterms as
Locate _ b -> subterms b
instance (Eq (sig (Term sig v)), Eq v) => Eq (Term sig v) where
Var v1 == Var v2 = v1 == v2
Term s1 == Term s2 = s1 == s2
_ == _ = False

Comment on lines +22 to +25
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The instance head can be cleaned up (and avoid the use of UndecidableInstances) using QuantifiedConstraints. However:

  • for Ord you end up needing to supply quantified constraints for both Eq and Ord, and
  • it was causing spurious CI failures for ghc9.2 and I really can't be bothered

instance (Ord (sig (Term sig v)), Ord v) => Ord (Term sig v) where
compare (Var v1) (Var v2) = compare v1 v2
compare (Var _) _ = LT
compare (Term s1) (Term s2) = compare s1 s2
compare _ _ = GT

-- Abstract interpretation

eval0 :: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m, Has (Dom val) sig m, Has (Reader Reference) sig m, Has S.Statement sig m) => Term -> m val
eval0 = fix eval
subterms :: (Ord (sig (Term sig v)), Ord v, Foldable sig) => Term sig v -> Set.Set (Term sig v)
subterms = mendlerParaTerm (Set.singleton . Var) (\ k -> foldMap (uncurry Set.insert . k))

eval
:: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m, Has (Dom val) sig m, Has (Reader Reference) sig m, Has S.Statement sig m)
=> (Term -> m val)
-> (Term -> m val)
eval eval = \case
Var n -> lookupEnv n >>= maybe (dvar n) fetch
Noop -> dunit
Iff c t e -> do
c' <- eval c
dif c' (eval t) (eval e)
Bool b -> dbool b
String s -> dstring s
Throw e -> eval e >>= ddie
Let n v b -> do
v' <- eval v
let' n v' (eval b)
t :>> u -> do
t' <- eval t
u' <- eval u
t' >>> u'
Import ns -> S.simport ns >> dunit
Function n ps b -> letrec n (dabs ps (foldr (\ (p, a) m -> let' p a m) (eval b) . zip ps))
Call f as -> do
f' <- eval f
as' <- traverse eval as
dapp f' as'
Locate s t -> local (setSpan s) (eval t)
where
setSpan s r = r{ refSpan = s }


-- Macro-expressible syntax

let' :: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m) => Name -> val -> m a -> m a
let' n v m = do
addr <- alloc n
addr .= v
bind n addr m

letrec :: (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m) => Name -> m val -> m val
letrec n m = do
addr <- alloc n
v <- bind n addr m
addr .= v
pure v


-- Parsing

parseFile :: (Has (Throw String) sig m, MonadIO m) => FilePath -> FilePath -> m (Source.Source, File Term)
parseFile srcPath jsonPath = do
contents <- liftIO (B.readFile jsonPath)
-- FIXME: get this from the JSON itself (cf https://github.com/tree-sitter/tree-sitter-graph/issues/69)
let sourcePath = replaceExtensions jsonPath "py"
sourceContents <- Source.fromUTF8 . B.toStrict <$> liftIO (B.readFile srcPath)
let span = decrSpan (Source.totalSpan sourceContents)
case A.eitherDecodeWith A.json' (A.iparse parseGraph) contents of
Left (_, err) -> throwError err
Right (_, Nothing) -> throwError "no root node found"
Right (_, Just root) -> pure (sourceContents, File (Reference sourcePath span) root)
where
decrSpan (Span (Pos sl sc) (Pos el ec)) = Span (Pos (sl - 1) (sc - 1)) (Pos (el - 1) (ec - 1))

newtype Graph = Graph { terms :: IntMap.IntMap Term }

-- | Parse a @Value@ into an entire graph of terms, as well as a root node, if any exists.
parseGraph :: A.Value -> A.Parser (Graph, Maybe Term)
parseGraph = A.withArray "nodes" $ \ nodes -> do
(untied, First root) <- fold <$> traverse parseNode (V.toList nodes)
-- @untied@ is an intmap, where the keys are graph node IDs and the values are functions from the final graph to the representations of said graph nodes. Likewise, @root@ is a function of the same variety, wrapped in a @Maybe@.
--
-- We define @tied@ as the fixpoint of the former to yield the former as a graph of type @Graph@, and apply the latter to said graph to yield the entry point, if any, from which to evaluate.
let tied = fix (\ tied -> ($ Graph tied) <$> untied)
pure (Graph tied, ($ Graph tied) <$> root)
foldTerm :: Functor sig => (v -> r) -> (sig r -> r) -> (Term sig v -> r)
foldTerm var sig = mendlerTerm var (\ k -> sig . fmap k)

Comment on lines +37 to +38
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here be recursion schemes 🐉

-- | Parse a node from a JSON @Value@ into a pair of a partial graph of unfixed terms and optionally an unfixed term representing the root node.
--
-- The partial graph is represented as an adjacency map relating node IDs to unfixed terms—terms which may make reference to a completed graph to find edges, and which therefore can't be inspected until the full graph is known.
parseNode :: A.Value -> A.Parser (IntMap.IntMap (Graph -> Term), First (Graph -> Term))
parseNode = A.withObject "node" $ \ o -> do
edges <- o A..: fromString "edges"
index <- o A..: fromString "id"
o A..: fromString "attrs" >>= A.withObject "attrs" (\ attrs -> do
ty <- attrs A..: fromString "type"
node <- parseTerm attrs edges ty
pure (IntMap.singleton index node, node <$ First (guard (ty == "module"))))
paraTerm :: Functor sig => (v -> r) -> (sig (Term sig v, r) -> r) -> (Term sig v -> r)
paraTerm var sig = mendlerParaTerm var (\ k -> sig . fmap k)

parseTerm :: A.Object -> [A.Value] -> String -> A.Parser (Graph -> Term)
parseTerm attrs edges = locate attrs . \case
"string" -> const . String <$> attrs A..: fromString "text"
"true" -> pure (const (Bool True))
"false" -> pure (const (Bool False))
"throw" -> fmap Throw <$> maybe empty resolve (listToMaybe edges)
"if" -> liftA3 Iff <$> findEdgeNamed edges "condition" <*> findEdgeNamed edges "consequence" <*> findEdgeNamed edges "alternative" <|> pure (const Noop)
"block" -> children edges
"module" -> children edges
"identifier" -> const . Var . name <$> attrs A..: fromString "text"
"import" -> const . Import . fromList . map snd . sortOn fst <$> traverse (resolveWith (const moduleNameComponent)) edges
"function" -> liftA3 Function . pure . name <$> attrs A..: fromString "name" <*> pure (pure []) <*> findEdgeNamed edges "body"
"call" -> liftA2 Call . const . Var . name <$> attrs A..: fromString "function" <*> (sequenceA <$> traverse resolve edges)
"noop" -> pure (pure Noop)
t -> A.parseFail ("unrecognized type: " <> t <> " attrs: " <> show attrs <> " edges: " <> show edges)

findEdgeNamed :: (Foldable t, A.FromJSON a, Eq a) => t A.Value -> a -> A.Parser (Graph -> Term)
findEdgeNamed edges name = foldMap (resolveWith (\ rep attrs -> attrs A..: fromString "type" >>= (rep <$) . guard . (== name))) edges

-- | Map a list of edges to a list of child nodes.
children :: [A.Value] -> A.Parser (Graph -> Term)
children edges = fmap chain . traverse snd . sortOn fst <$> traverse (resolveWith child) edges
mendlerTerm :: (v -> r) -> (forall r' . (r' -> r) -> sig r'-> r) -> (Term sig v -> r)
mendlerTerm var sig = go
where
child :: (Graph -> Term) -> A.Object -> A.Parser (Int, Graph -> Term)
child term attrs = (,) <$> attrs A..: fromString "index" <*> pure term

chain :: [Term] -> Term
chain [] = Noop
chain (t:ts) = foldl' (:>>) t ts

moduleNameComponent :: A.Object -> A.Parser (Int, Text)
moduleNameComponent attrs = (,) <$> attrs A..: fromString "index" <*> attrs A..: fromString "text"

resolve :: A.Value -> A.Parser (Graph -> Term)
resolve = resolveWith (const . pure)

resolveWith :: ((Graph -> Term) -> A.Object -> A.Parser a) -> A.Value -> A.Parser a
resolveWith f = resolveWith' (f . flip ((IntMap.!) . terms))
go (Var v) = var v
go (Term s) = sig go s

resolveWith' :: (IntMap.Key -> A.Object -> A.Parser a) -> A.Value -> A.Parser a
resolveWith' f = A.withObject "edge" (\ edge -> do
sink <- edge A..: fromString "sink"
attrs <- edge A..: fromString "attrs"
f sink attrs)

locate :: A.Object -> A.Parser (Graph -> Term) -> A.Parser (Graph -> Term)
locate attrs p = do
span <- span
<$> attrs A..:? fromString "start-line"
<*> attrs A..:? fromString "start-col"
<*> attrs A..:? fromString "end-line"
<*> attrs A..:? fromString "end-col"
t <- p
case span of
Nothing -> pure t
Just s -> pure (Locate s <$> t)
mendlerParaTerm :: (v -> r) -> (forall r' . (r' -> (Term sig v, r)) -> sig r'-> r) -> (Term sig v -> r)
mendlerParaTerm var sig = go
where
span sl sc el ec = Span <$> (Pos <$> sl <*> sc) <*> (Pos <$> el <*> ec)


-- Debugging

analyzeFile
:: (Algebra sig m, MonadIO m)
=> FilePath
-> FilePath
-> ( forall term
. Ord term
=> ( forall sig m
. (Has (Env addr) sig m, HasLabelled Store (Store addr val) sig m, Has (Dom val) sig m, Has (Reader Reference) sig m, Has S.Statement sig m)
=> (term -> m val)
-> (term -> m val) )
-> Source.Source
-> File term
-> m b )
-> m b
analyzeFile srcPath jsonPath analyze = do
(src, file) <- parseToTerm srcPath jsonPath
analyze eval src file

parseToTerm :: (Algebra sig m, MonadIO m) => FilePath -> FilePath -> m (Source.Source, File Term)
parseToTerm srcPath jsonPath = do
parsed <- runThrow @String (parseFile srcPath jsonPath)
either (liftIO . throwIO . ErrorCall) pure parsed
go (Var v) = var v
go (Term s) = sig ((,) <*> go) s
Loading
Loading