-
Notifications
You must be signed in to change notification settings - Fork 1
/
csvzip.hs
54 lines (45 loc) · 2.11 KB
/
csvzip.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
-----------------------------------------------------------------------------
-- |
-- Module : csvzip
-- Copyright : (c) Keith Sheppard 2009-2010
-- License : BSD3
-- Maintainer : [email protected]
-- Stability : experimental
-- Portability : portable
--
-- Joins CSV files by pasting the columns together. Analogous to cbind for
-- those familliar with the R programming language. This utility streams
-- data so it can work on very large files. If the table row lengths don't
-- match then the shorter tables will be padded with empty cells.
--
-----------------------------------------------------------------------------
import System.Environment (getArgs, getProgName)
import Database.TxtSushi.FlatFile (csvFormat, formatTable, parseTable)
import Database.TxtSushi.IOUtil (getContentsFromFileOrStdin, versionStr)
main :: IO ()
main = do
fileNames <- getArgs
case fileNames of
-- parse all CSV files giving us a list of tables, then zip and print them
(_ : _ : _) -> do
tables <- mapM getAndParseTable fileNames
putStr $ formatTable csvFormat (zipAllColumns tables)
_ -> printUsage
-- | read the contents of the given files name and parse it as a CSV file
getAndParseTable :: String -> IO [[String]]
getAndParseTable = fmap (parseTable csvFormat) . getContentsFromFileOrStdin
-- | zips together the columns of a non-empty list of tables
zipAllColumns :: [[[String]]] -> [[String]]
zipAllColumns = foldl1 (zipCols [] [])
where
-- if row counts don't match we pad the table that fell short with empty cells
zipCols _ _ (x:xt) (y:yt) = (x ++ y) : zipCols x y xt yt
zipCols _ _ [] [] = []
zipCols _ prevY xs [] = zipWith (++) xs (padCols prevY)
zipCols prevX _ [] ys = zipWith (++) (padCols prevX) ys
padCols lastRow = repeat (replicate (length lastRow) "")
printUsage :: IO ()
printUsage = do
progName <- getProgName
putStrLn $ progName ++ " (" ++ versionStr ++ ")"
putStrLn $ "Usage: " ++ progName ++ " csvfile_or_dash csvfile_or_dash ..."