Skip to content

Commit

Permalink
Fixed bug with empty "". Added dynamic settings of resultLimit.
Browse files Browse the repository at this point in the history
  • Loading branch information
ondrap committed Apr 21, 2015
1 parent 792c175 commit ca715d3
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 17 deletions.
3 changes: 0 additions & 3 deletions Data/JsonStream/CLexType.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ newtype LexResultType = LexResultType CInt deriving (Show, Eq, Storable)

#include "lexer.h"

resultLimit :: Int
resultLimit = #const RESULT_COUNT

#{enum LexResultType, LexResultType
, resNumber = RES_NUMBER
, resString = RES_STRING
Expand Down
23 changes: 16 additions & 7 deletions Data/JsonStream/CLexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@ data Header = Header {
, hdrPosition :: !CInt
, hdrLength :: !CInt
, hdrResultNum :: !CInt
, hdrResultLimit :: !CInt
} deriving (Show)

defHeader :: Header
defHeader = Header 0 0 0 0 0 0 0

instance Storable Header where
sizeOf _ = 7 * sizeOf (undefined :: CInt)
sizeOf _ = 8 * sizeOf (undefined :: CInt)
alignment _ = sizeOf (undefined :: CInt)
peek ptr = do
state <- peekByteOff ptr 0
Expand All @@ -55,8 +59,8 @@ instance Storable Header where
position <- peekByteOff ptr (3 * sizeOf state)
slength <- peekByteOff ptr (4 * sizeOf state)
sresultnum <- peekByteOff ptr (5 * sizeOf state)
return $ Header state sdata1 sdata2 position slength sresultnum
-- return $ Header state sdata1 sdata2 position slength sresultnum
sresultlimit <- peekByteOff ptr (6 * sizeOf state)
return $ Header state sdata1 sdata2 position slength sresultnum sresultlimit

poke ptr (Header {..}) = do
pokeByteOff ptr 0 hdrCurrentState
Expand All @@ -65,6 +69,7 @@ instance Storable Header where
pokeByteOff ptr (3 * sizeOf hdrCurrentState) hdrPosition
pokeByteOff ptr (4 * sizeOf hdrCurrentState) hdrLength
pokeByteOff ptr (5 * sizeOf hdrCurrentState) hdrResultNum
pokeByteOff ptr (6 * sizeOf hdrCurrentState) hdrResultLimit

peekResultField :: Int -> Int -> ResultPtr -> Int
peekResultField n fieldno fptr = inlinePerformIO $ -- !! Using inlinePerformIO should be safe - we are just reading bytes from memory
Expand All @@ -91,7 +96,7 @@ callLex bs hdr = unsafeDupablePerformIO $ -- Using Dupable PerformIO should be s
poke hdrptr (hdr{hdrResultNum=0, hdrLength=fromIntegral $ BS.length bs})

bsptr <- unsafeUseAsCString bs return
resptr <- mallocForeignPtrBytes (resultLimit * sizeOf (undefined :: CInt) * 4)
resptr <- mallocForeignPtrBytes (fromIntegral (hdrResultLimit hdr) * sizeOf (undefined :: CInt) * 4)
res <- withForeignPtr resptr $ \resptr' ->
lexJson bsptr hdrptr resptr'

Expand Down Expand Up @@ -212,18 +217,22 @@ parseResults (TempData {tmpNumbers=tmpNumbers, tmpBuffer=bs}) (err, hdr, rescoun
PartialResult (StringContent (encodeUtf8 $ T.singleton $ toEnum resAddData)) next
-- -- Partial string, not the end
| resType == resStringPartial ->
if resLength == 0
if resLength == -1
then PartialResult (StringContent (BSW.singleton $ fromIntegral resAddData)) next -- \n\r..
else PartialResult (StringContent textSection) next -- normal string section
| otherwise -> error "Unsupported"

-- | Estimate number of elements in a chunk
estResultLimit :: BS.ByteString -> CInt
estResultLimit dta = fromIntegral $ 1 + BS.length dta `div` 5

getNextResult :: TempData -> TokenResult
getNextResult tmp@(TempData {..})
| tmpError = TokFailed
| hdrPosition tmpHeader < hdrLength tmpHeader = parseResults tmp (callLex tmpBuffer tmpHeader)
| otherwise = TokMoreData newdata
where
newdata dta = parseResults newtmp (callLex dta newhdr)
newdata dta = parseResults newtmp (callLex dta newhdr{hdrResultLimit=estResultLimit dta})
where
newtmp = tmp{tmpBuffer=dta}
newhdr = tmpHeader{hdrPosition=0, hdrLength=fromIntegral $ BS.length dta}
Expand All @@ -232,4 +241,4 @@ getNextResult tmp@(TempData {..})
tokenParser :: BS.ByteString -> TokenResult
tokenParser dta = getNextResult (TempData dta newhdr False [])
where
newhdr = Header 0 0 0 0 (fromIntegral $ BS.length dta) 0
newhdr = defHeader{hdrLength=fromIntegral (BS.length dta), hdrResultLimit=(estResultLimit dta)}
3 changes: 2 additions & 1 deletion Data/JsonStream/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ import qualified Data.Text.Lazy as TL
import Data.Text.Lazy.Encoding (decodeUtf8')
import qualified Data.Vector as Vec

import Data.Bits (clearBit, setBit)
import Data.JsonStream.CLexer (tokenParser)
import Data.JsonStream.TokenParser

Expand Down Expand Up @@ -463,6 +462,7 @@ ignoreVal' stval = Parser $ moreData (handleTok stval)
handleLongString level _ (StringContent _) ntok = moreData (handleLongString level) ntok
handleLongString 0 _ StringEnd ntok = Done "" ntok
handleLongString level _ StringEnd ntok = moreData (handleTok level) ntok
handleLongString _ _ el _ = Failed $ "Unexpected element in handleLongStr: " ++ (show el)

handleTok :: Int -> TokenResult -> Element -> TokenResult -> ParseResult a
handleTok 0 _ (JValue _) ntok = Done "" ntok
Expand All @@ -480,6 +480,7 @@ ignoreVal' stval = Parser $ moreData (handleTok stval)
ObjectEnd _ -> moreData (handleTok (level - 1)) ntok
ArrayBegin -> moreData (handleTok (level + 1)) ntok
ObjectBegin -> moreData (handleTok (level + 1)) ntok
StringEnd -> Failed "Internal error - out of order StringEnd"

-- | Gather matches and return them as list.
--
Expand Down
7 changes: 3 additions & 4 deletions c_lib/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,7 @@ int handle_string(const char *input, struct lexer *lexer)
// Emit partial string
res->restype = RES_STRING_PARTIAL;
res->adddata = 0;
if (res->length != 0) // Do not add new result, if length == 0
lexer->result_num++;
lexer->result_num++;

// If we stopped because of backslash, change state, move one forward
if (lexer->position < lexer->length) {
Expand Down Expand Up @@ -251,7 +250,7 @@ static inline void emitchar(char ch, struct lexer *lexer)

res->restype = RES_STRING_PARTIAL;
res->startpos = lexer->position;
res->length = 0;
res->length = -1; // Special value indicating that this is special character
res->adddata = ch;

lexer->result_num++;
Expand Down Expand Up @@ -294,7 +293,7 @@ int lex_json(const char *input, struct lexer *lexer, struct lexer_result *result
&&state_string_uni
};
#define DISPATCH() { \
if (!(lexer->position < lexer->length && lexer->result_num < RESULT_COUNT && res == 0)) \
if (!(lexer->position < lexer->length && lexer->result_num < lexer->result_limit && res == 0)) \
return res; \
goto *dispatch_table[lexer->current_state];\
}
Expand Down
3 changes: 1 addition & 2 deletions c_lib/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#define RES_STRING_UNI 11
#define RES_NUMBER_SMALL 12

#define RESULT_COUNT 6000

enum states {
STATE_BASE = 0,
STATE_STRING,
Expand Down Expand Up @@ -47,6 +45,7 @@ struct lexer {
int length;

int result_num;
int result_limit;
struct lexer_result *result;
};

Expand Down
8 changes: 8 additions & 0 deletions test/ParserSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,14 @@ errTests = describe "Tests of previous errors" $ do
res = parseLazyByteString parser onechar :: [Int]
res `shouldBe` [123]

it "Parses correctly handles empty strings when sliced:" $ do
let test1 = "[\"\", \"\", true]"
onechar = BL.fromChunks $ map BS.singleton $ BS.unpack test1
parser = arrayOf bool
res = parseByteString parser test1 :: [Bool]
res `shouldBe` [True]


-- testLexer (start:rest) = iter rest (tokenParser start)
-- where
-- iter [] (TokMoreData cont) = print "done"
Expand Down

0 comments on commit ca715d3

Please sign in to comment.