Skip to content

Commit

Permalink
Added lexer for php lang
Browse files Browse the repository at this point in the history
  • Loading branch information
vmalkhasian committed Jun 28, 2020
1 parent c7e6339 commit b31c623
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 2 deletions.
3 changes: 3 additions & 0 deletions Main.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts, OverloadedStrings, ExistentialQuantification #-}
{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses #-}
module Main where

import Lexer
Expand Down Expand Up @@ -53,12 +54,14 @@ langTbl = [
, (ncs , \(ParserProxy p) -> runProgram csharp p)
, (npy , \(ParserProxy p) -> runProgram python p)
, (njs , \(ParserProxy p) -> runProgram js p)
, (nphp , \(ParserProxy p) -> runProgram php p)
]
where
ncpp = ["cpp", "c++"]
ncs = ["c#", "csharp", "cs"]
npy = ["python", "py"]
njs = ["js", "javascript"]
nphp = ["php"]

data ParserProxy = forall p.
( Parser p
Expand Down
3 changes: 2 additions & 1 deletion alpaca-parser-generator.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: ce63852fddd1891722b3791ee6757ebf29e4809f39fba3da2331dec855a1fdf7
-- hash: ce90a3b3415136568951b5b98465d1bdc3c45476c3c6df83485e3a17bfd8892a

name: alpaca-parser-generator
version: 0.1.2.1
Expand All @@ -29,6 +29,7 @@ executable alpaca
Lexer.CSharp
Lexer.FA
Lexer.JS
Lexer.PHP
Lexer.Python
Lexer.Types
MonadTypes
Expand Down
1 change: 0 additions & 1 deletion examples/polish-notation-calc/nodejs/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
lexer.*
parser.*
__pycache__
2 changes: 2 additions & 0 deletions examples/polish-notation-calc/php/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lexer.*
parser.*
18 changes: 18 additions & 0 deletions examples/polish-notation-calc/php/syntax.xy
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
number /[0-9]+/ +$text
add /\+/
sub /\-/
mul /\*/
div /\//
pow /\^/
/ +/
%%

S : E %eof { _1 }
;
E : add E E { _2 + _3 }
| sub E E { _2 - _3 }
| mul E E { _2 * _3 }
| div E E { _2 / _3 }
| pow E E { _2 ** _3 }
| number { _1 }
;
5 changes: 5 additions & 0 deletions lib/Lang.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ instance Lang CPP
instance Lang Python
instance Lang CSharp
instance Lang JS
instance Lang PHP

data CPP
data Python
data CSharp
data JS
data PHP

cpp :: Proxy CPP
cpp = Proxy
Expand All @@ -24,3 +26,6 @@ csharp = Proxy

js :: Proxy JS
js = Proxy

php :: Proxy PHP
php = Proxy
1 change: 1 addition & 0 deletions lib/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ import Lexer.CPP()
import Lexer.CSharp()
import Lexer.Python()
import Lexer.JS()
import Lexer.PHP()
import Lang
121 changes: 121 additions & 0 deletions lib/Lexer/PHP.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{-# OPTIONS_GHC -Wno-orphans #-}
{-# LANGUAGE QuasiQuotes, OverloadedStrings #-}
module Lexer.PHP() where

import qualified Data.List.NonEmpty as NE
import Regex.Parse
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Lexer.Types
import Lang
import Utils

instance LexerWriter PHP where
writeLexer _ accSt tokNames stList =
[ ("Lexer.php", [interp|
<?php

namespace Alpaca;

class Lexer
{
const TOKEN_TYPE_EOF = 0;
#{indent 1 tokDefns}

private $input;
private $curChIx;
private $debug;

public function __construct(string $input, int $curChIx, bool $debug)
{
$this->input = $input;
$this->curChIx = $curChIx;
$this->debug = $debug;
}

public static function tokenToString(int $token): string
{
switch($token) {
case 0: return '%eof';
#{indent 3 tokToStr}
default: throw new \\InvalidArgumentException("Unknown token $token");
}
}

public function getNextToken(): array
{
$lastAccChIx = $this->curChIx;
$startChIx = $this->curChIx;
$accSt = -1;
$curSt = 0;
while ($curSt >= 0) {
if (in_array($curSt, [#{T.intercalate "," $ map (tshow . fst) accSt}])) {
$lastAccChIx = $this->curChIx;
$accSt = $curSt;
}

if ($this->curChIx >= strlen($this->input))
break;

$curCh = $this->input[$this->curChIx];
$this->curChIx+=1;
switch($curSt) {
#{indent 4 transTable}
}
break;
}

$lastReadChIx = $this->curChIx;
$this->curChIx = $lastAccChIx;
$text = substr($this->input, $startChIx, $lastAccChIx);
switch($accSt) {
#{indent 3 returnResult}
}

if ($this->curChIx >= strlen($this->input)) {
if ($this->debug) printf('Got EOF while lexing "%s"', $text);
return [self::TOKEN_TYPE_EOF, null];
}
throw new \\InvalidArgumentException("Unexpected input: " . substr($this->input, $startChIx, $lastReadChIx));
}
}
|])]
where
indent = indentLang 4
returnResult = T.intercalate "\n" (map returnResult1 accSt)
returnResult1 :: (Int, (Maybe Text, Action)) -> Text
returnResult1 (st, (Just name, act)) = [interp|
case #{st}:
if ($this->debug) printf('Lexed token #{name}: "%s"', $text);
return [self::TOKEN_TYPE_#{T.toUpper name}, #{mkAct act}];
|]
returnResult1 (st, (Nothing, _)) = [interp|
case #{st}:
if ($this->debug) printf('Skipping state #{tshow st}: "%s"', $text);
return $this->getNextToken();
|]
checkState :: (Int, (a, [(NE.NonEmpty CharPattern, Int)])) -> Maybe Text
checkState (_, (_, [])) = Nothing
checkState (curSt, (_, charTrans)) = Just [interp|
case #{tshow curSt}:
#{indent 1 $ T.intercalate " else " (map checkChars charTrans)}
break;
|]
transTable = T.intercalate "\n" $ mapMaybe checkState stList
tokDefns = T.intercalate "\n" $ zipWith (\x n -> [interp|const TOKEN_TYPE_#{T.toUpper x} = #{n};|] :: Text) tokNames [1::Word ..]
tokToStr = T.intercalate "\n" $ zipWith (\x n -> [interp|case #{n}: return '#{x}';|] :: Text) tokNames [1::Word ..]
mkAct NoAction = "null"
mkAct (Action act) = act
checkChars :: (NE.NonEmpty CharPattern, Int) -> Text
checkChars (charGroup, newSt) = [interp|
if (#{charCond charGroup}) {
$curSt = #{newSt};
continue;
}
|]
charCond = T.intercalate " || " . map charCond1 . NE.toList
charCond1 :: CharPattern -> Text
charCond1 (CChar c) = [interp|$curCh === #{tshow c}|]
charCond1 (CRange c1 c2) = [interp|($curCh >= #{tshow c1} && $curCh <= #{tshow c2})|]
charCond1 CAny = "true"

0 comments on commit b31c623

Please sign in to comment.