diff --git a/package.yaml b/package.yaml index ca67d82208..cbb12cd5a1 100644 --- a/package.yaml +++ b/package.yaml @@ -114,7 +114,7 @@ dependencies: - rio-prettyprint >= 0.1.8.0 - split - stm -- tar >= 0.5.1.1 && < 0.6.0.0 +- tar >= 0.6.2.0 - template-haskell - text - time @@ -157,7 +157,6 @@ library: - Build_stack - Paths_stack exposed-modules: - - Codec.Archive.Tar.Utf8 - Control.Concurrent.Execute - Data.Attoparsec.Args - Data.Attoparsec.Combinators diff --git a/src/Codec/Archive/Tar/Utf8.hs b/src/Codec/Archive/Tar/Utf8.hs deleted file mode 100644 index 13f050673b..0000000000 --- a/src/Codec/Archive/Tar/Utf8.hs +++ /dev/null @@ -1,182 +0,0 @@ -module Codec.Archive.Tar.Utf8 - ( module Codec.Archive.Tar - , entryPath - , unpack - ) where - --- | A module that is equivalent to "Codec.Archive.Tar" from the @tar@ package, --- except that @unpack@ assumes that the file paths in an archive are UTF8 --- encoded. - -import Codec.Archive.Tar hiding ( entryPath, unpack ) -import Codec.Archive.Tar.Check ( checkSecurity ) -import Codec.Archive.Tar.Entry ( Entry (..), TarPath, fromLinkTarget ) -import qualified Codec.Archive.Tar.Entry as Tar -import Control.Exception ( Exception, catch, throwIO ) -import Data.Bits ( (.|.), (.&.), shiftL ) -import qualified Data.ByteString.Lazy as LBS -import Data.Char ( chr, ord ) -import Data.Int ( Int64 ) -import Data.Maybe ( fromMaybe ) -import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) -import System.Directory - ( copyFile, createDirectoryIfMissing, setModificationTime ) -import System.FilePath ( () ) -import qualified System.FilePath as FP -import System.IO.Error ( isPermissionError ) - -type EpochTime = Int64 - --- | Native 'FilePath' of the file or directory within the archive. --- --- Assumes that the 'TarPath' of an 'Entry' is UTF8 encoded. -entryPath :: Entry -> FilePath -entryPath = fromTarPath . entryTarPath - --- | Convert a 'TarPath' to a native 'FilePath'. --- --- The native 'FilePath' will use the native directory separator but it is not --- otherwise checked for validity or sanity. In particular: --- --- * The tar path may be invalid as a native path, eg the file name @\"nul\"@ --- is not valid on Windows. --- --- * The tar path may be an absolute path or may contain @\"..\"@ components. --- For security reasons this should not usually be allowed, but it is your --- responsibility to check for these conditions (eg using 'checkSecurity'). --- --- Assumes that the 'TarPath' is UTF8 encoded. -fromTarPath :: TarPath -> FilePath -fromTarPath tp = decodeIfUtf8Encoded $ Tar.fromTarPath tp - --- | Create local files and directories based on the entries of a tar archive. --- --- This is a portable implementation of unpacking suitable for portable --- archives. It handles 'NormalFile' and 'Directory' entries and has simulated --- support for 'SymbolicLink' and 'HardLink' entries. Links are implemented by --- copying the target file. This therefore works on Windows as well as Unix. --- All other entry types are ignored, that is they are not unpacked and no --- exception is raised. --- --- If the 'Entries' ends in an error then it is raised an an exception. Any --- files or directories that have been unpacked before the error was --- encountered will not be deleted. For this reason you may want to unpack --- into an empty directory so that you can easily clean up if unpacking fails --- part-way. --- --- On its own, this function only checks for security (using 'checkSecurity'). --- You can do other checks by applying checking functions to the 'Entries' that --- you pass to this function. For example: --- --- > unpack dir (checkTarbomb expectedDir entries) --- --- If you care about the priority of the reported errors then you may want to --- use 'checkSecurity' before 'checkTarbomb' or other checks. --- --- Assumes that the 'TarPath' of an `Entry` is UTF8 encoded. -unpack :: Exception e => FilePath -> Entries e -> IO () -unpack baseDir entries = unpackEntries [] (checkSecurity entries) - >>= emulateLinks - - where - -- We're relying here on 'checkSecurity' to make sure we're not scribbling - -- files all over the place. - - unpackEntries _ (Fail err) = either throwIO throwIO err - unpackEntries links Done = return links - unpackEntries links (Next entry es) = case entryContent entry of - NormalFile file _ -> extractFile path file mtime - >> unpackEntries links es - Directory -> extractDir path mtime - >> unpackEntries links es - HardLink link -> (unpackEntries $! saveLink path link links) es - SymbolicLink link -> (unpackEntries $! saveLink path link links) es - _ -> unpackEntries links es --ignore other file types - where - path = entryPath entry - mtime = entryTime entry - - extractFile path content mtime = do - -- Note that tar archives do not make sure each directory is created - -- before files they contain, indeed we may have to create several - -- levels of directory. - createDirectoryIfMissing True absDir - LBS.writeFile absPath content - setModTime absPath mtime - where - absDir = baseDir FP.takeDirectory path - absPath = baseDir path - - extractDir path mtime = do - createDirectoryIfMissing True absPath - setModTime absPath mtime - where - absPath = baseDir path - - saveLink path link links = seq (length path) - $ seq (length link') - $ (path, link'):links - where link' = fromLinkTarget link - - emulateLinks = mapM_ $ \(relPath, relLinkTarget) -> - let absPath = baseDir relPath - absTarget = FP.takeDirectory absPath relLinkTarget - in copyFile absTarget absPath - -setModTime :: FilePath -> EpochTime -> IO () -setModTime path t = - setModificationTime path (posixSecondsToUTCTime (fromIntegral t)) - `catch` \e -> - if isPermissionError e then return () else throwIO e - --- | If the given 'String' can be interpreted as a string of bytes that encodes --- a string using UTF8, then yields the string decoded, otherwise yields the --- given 'String'. - --- Inspired by the utf8-string package. -decodeIfUtf8Encoded :: String -> String -decodeIfUtf8Encoded s = fromMaybe s $ decode s - where - decode :: String -> Maybe String - decode [] = Just "" - decode (c:cs) - | c' < 0x80 = decode' c cs - | c' < 0xc0 = Nothing - | c' < 0xe0 = multi1 - | c' < 0xf0 = multiByte 2 0b1111 0x00000800 - | c' < 0xf8 = multiByte 3 0b0111 0x00010000 - | c' < 0xfc = multiByte 4 0b0011 0x00200000 - | c' < 0xfe = multiByte 5 0b0001 0x04000000 - | otherwise = Nothing - where - c' = ord c - isValidByte b = b <= 0xff && b .&. 0b11000000 == 0b10000000 - combine b1 b2 = (b1 `shiftL` 6) .|. (b2 .&. 0b00111111) - multi1 = case cs of - c1:ds | isValidByte c1' -> - let d = combine (c' .&. 0b00011111) c1' - in if d >= 0x80 - then decode' (chr d) ds - else Nothing - where - c1' = ord c1 - _ -> Nothing - multiByte :: Int -> Int -> Int -> Maybe String - multiByte i mask overlong = aux i cs (c' .&. mask) - where - aux 0 rs acc - | isValidAcc = decode' (chr acc) rs - | otherwise = Nothing - where - isValidAcc = overlong <= acc - && acc <= 0x10ffff - && (acc < 0xd800 || 0xdfff < acc) - && (acc < 0xfffe || 0xffff < acc) - aux n (r : rs) acc | isValidByte r' = aux (n - 1) rs $ combine acc r' - where - r' = ord r - aux _ _ _ = Nothing - decode' :: Char -> String -> Maybe String - decode' x xs = do - xs' <- decode xs - pure $ x : xs' diff --git a/src/Stack/SDist.hs b/src/Stack/SDist.hs index 8add686e22..4f989f8d75 100644 --- a/src/Stack/SDist.hs +++ b/src/Stack/SDist.hs @@ -15,7 +15,7 @@ module Stack.SDist , readLocalPackage ) where -import qualified Codec.Archive.Tar.Utf8 as Tar +import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import qualified Codec.Compression.GZip as GZip import Conduit ( runConduitRes, sourceLazy, sinkFileCautious ) @@ -23,7 +23,6 @@ import Control.Concurrent.Execute ( ActionContext (..), Concurrency (..) ) import Control.Monad.Extra ( whenJust ) import qualified Data.ByteString as S -import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import Data.Char ( toLower ) import Data.Data ( cast ) @@ -252,13 +251,9 @@ getSDistTarball mpvpBounds pkgDir = do -- prone and more predictable to read everything in at once, so that's what -- we're doing for now: let tarPath isDir fp = - case Tar.toTarPath isDir (forceUtf8Enc (pkgIdName FP. fp)) of + case Tar.toTarPath isDir (pkgIdName FP. fp) of Left e -> prettyThrowIO $ ToTarPathException e Right tp -> pure tp - -- convert a String of proper characters to a String of bytes in UTF8 - -- encoding masquerading as characters. This is necessary for tricking the - -- tar package into proper character encoding. - forceUtf8Enc = S8.unpack . T.encodeUtf8 . T.pack packWith f isDir fp = liftIO $ f (pkgFp FP. fp) =<< tarPath isDir fp packDir = packWith Tar.packDirectoryEntry True packFile fp diff --git a/stack-ghc-9.8.2.yaml b/stack-ghc-9.8.2.yaml index 36c62b4535..652732d36f 100644 --- a/stack-ghc-9.8.2.yaml +++ b/stack-ghc-9.8.2.yaml @@ -3,6 +3,13 @@ snapshot: nightly-2024-04-04 # GHC 9.8.2 +extra-deps: +# Dependency of tar >= 0.6.2.0: +- os-string-2.0.2@sha256:32fa47f8345a2c0662fb602fc42e4b674e41ec48079b68bdecb4b6f68032c24e,3259 +# nightly-2024-04-04 provides tar-0.5.1.1, which does not support Unicode +# filenames: +- tar-0.6.2.0@sha256:619828cae098a7b6deeb0316e12f55011101d88f756787ed024ceedb81cf1eba,4576 + docker: enable: false repo: quay.io/benz0li/ghc-musl:9.8.1 diff --git a/stack-ghc-9.8.2.yaml.lock b/stack-ghc-9.8.2.yaml.lock index 8f30156cd4..1ce1544cab 100644 --- a/stack-ghc-9.8.2.yaml.lock +++ b/stack-ghc-9.8.2.yaml.lock @@ -3,7 +3,21 @@ # For more information, please see the documentation at: # https://docs.haskellstack.org/en/stable/lock_files -packages: [] +packages: +- completed: + hackage: os-string-2.0.2@sha256:32fa47f8345a2c0662fb602fc42e4b674e41ec48079b68bdecb4b6f68032c24e,3259 + pantry-tree: + sha256: 5b1dbaf1663005907462032997264b7942befb9fc108491c1fdd5e488ac0817e + size: 2217 + original: + hackage: os-string-2.0.2@sha256:32fa47f8345a2c0662fb602fc42e4b674e41ec48079b68bdecb4b6f68032c24e,3259 +- completed: + hackage: tar-0.6.2.0@sha256:619828cae098a7b6deeb0316e12f55011101d88f756787ed024ceedb81cf1eba,4576 + pantry-tree: + sha256: 1dde50961e9d1a6e6f820d918c0edbbd9673c83afdd300e0aae40a691e8151df + size: 2168 + original: + hackage: tar-0.6.2.0@sha256:619828cae098a7b6deeb0316e12f55011101d88f756787ed024ceedb81cf1eba,4576 snapshots: - completed: sha256: 31177dd01e2b7801b9347776d892d1e808396c21ab3b60f7f8c96856bdbe5f52 diff --git a/stack.cabal b/stack.cabal index a81cbad1a7..00034a6c82 100644 --- a/stack.cabal +++ b/stack.cabal @@ -152,7 +152,6 @@ flag supported-build library exposed-modules: - Codec.Archive.Tar.Utf8 Control.Concurrent.Execute Data.Attoparsec.Args Data.Attoparsec.Combinators @@ -409,7 +408,7 @@ library , rio-prettyprint >=0.1.8.0 , split , stm - , tar >=0.5.1.1 && <0.6.0.0 + , tar >=0.6.2.0 , template-haskell , text , time @@ -532,7 +531,7 @@ executable stack , split , stack , stm - , tar >=0.5.1.1 && <0.6.0.0 + , tar >=0.6.2.0 , template-haskell , text , time @@ -634,7 +633,7 @@ executable stack-integration-test , rio-prettyprint >=0.1.8.0 , split , stm - , tar >=0.5.1.1 && <0.6.0.0 + , tar >=0.6.2.0 , template-haskell , text , time @@ -752,7 +751,7 @@ test-suite stack-unit-test , split , stack , stm - , tar >=0.5.1.1 && <0.6.0.0 + , tar >=0.6.2.0 , template-haskell , text , time diff --git a/stack.yaml b/stack.yaml index 66762a01c8..97d521b109 100644 --- a/stack.yaml +++ b/stack.yaml @@ -7,9 +7,13 @@ extra-deps: # Cabal is pruned because process is a GHC boot package, and has to be specified # again. - Cabal-3.10.1.0@sha256:6d11adf7847d9734e7b02785ff831b5a0d11536bfbcefd6634b2b08411c63c94,12316 +# Dependency of tar >= 0.6.2.0: +- os-string-2.0.2@sha256:32fa47f8345a2c0662fb602fc42e4b674e41ec48079b68bdecb4b6f68032c24e,3259 - pantry-0.10.0@sha256:6f99ee8d7cfeeb0e2513638618acf80c72d018e7f10120048fa120a409b9dcd2,7864 # GHC 9.6.4 comes with process-1.6.17.0, which can segfault on macOS. - process-1.6.18.0@sha256:cd0a3e0376b5a8525983d3131a31e52f9ffefc278ce635eec45a9d3987b8be3e,3025 +# lts-22.7 provides tar-0.5.1.1, which does not support Unicode filenames: +- tar-0.6.2.0@sha256:619828cae098a7b6deeb0316e12f55011101d88f756787ed024ceedb81cf1eba,4576 docker: enable: false diff --git a/stack.yaml.lock b/stack.yaml.lock index 30e9d3c540..a5c58494e1 100644 --- a/stack.yaml.lock +++ b/stack.yaml.lock @@ -11,6 +11,13 @@ packages: size: 9223 original: hackage: Cabal-3.10.1.0@sha256:6d11adf7847d9734e7b02785ff831b5a0d11536bfbcefd6634b2b08411c63c94,12316 +- completed: + hackage: os-string-2.0.2@sha256:32fa47f8345a2c0662fb602fc42e4b674e41ec48079b68bdecb4b6f68032c24e,3259 + pantry-tree: + sha256: 5b1dbaf1663005907462032997264b7942befb9fc108491c1fdd5e488ac0817e + size: 2217 + original: + hackage: os-string-2.0.2@sha256:32fa47f8345a2c0662fb602fc42e4b674e41ec48079b68bdecb4b6f68032c24e,3259 - completed: hackage: pantry-0.10.0@sha256:6f99ee8d7cfeeb0e2513638618acf80c72d018e7f10120048fa120a409b9dcd2,7864 pantry-tree: @@ -25,6 +32,13 @@ packages: size: 1675 original: hackage: process-1.6.18.0@sha256:cd0a3e0376b5a8525983d3131a31e52f9ffefc278ce635eec45a9d3987b8be3e,3025 +- completed: + hackage: tar-0.6.2.0@sha256:619828cae098a7b6deeb0316e12f55011101d88f756787ed024ceedb81cf1eba,4576 + pantry-tree: + sha256: 1dde50961e9d1a6e6f820d918c0edbbd9673c83afdd300e0aae40a691e8151df + size: 2168 + original: + hackage: tar-0.6.2.0@sha256:619828cae098a7b6deeb0316e12f55011101d88f756787ed024ceedb81cf1eba,4576 snapshots: - completed: sha256: 7b975b104cb3dbf0c297dfd01f936a4d2ee523241dd0b1ae960522b833fe3027 diff --git a/tests/integration/tests/6372-sdist-unicode-test/.gitignore b/tests/integration/tests/6372-sdist-unicode-test/.gitignore new file mode 100644 index 0000000000..609770f0c7 --- /dev/null +++ b/tests/integration/tests/6372-sdist-unicode-test/.gitignore @@ -0,0 +1 @@ +stack.yaml.lock diff --git a/tests/integration/tests/6372-sdist-unicode-test/Main.hs b/tests/integration/tests/6372-sdist-unicode-test/Main.hs new file mode 100644 index 0000000000..1d546c1afc --- /dev/null +++ b/tests/integration/tests/6372-sdist-unicode-test/Main.hs @@ -0,0 +1,22 @@ +import StackTest + +import Control.Monad (unless) + +-- | The test fails at runtime on the Windows Server 2022 GitHub-hosted runner +-- only, at the point of outputting a Unicode character, with: +-- +-- : commitAndReleaseBuffer: invalid argument (cannot encode character '\1633') +-- +-- That appears to be similar to +-- https://gitlab.haskell.org/ghc/ghc/-/issues/8118, however: (1) the locale is +-- set to C.UTF-8 and the active code page is 65001; and +-- (2) `GHC.IO.Encoding.setLocaleEncoding utf8` has no effect. +-- +-- Until the origin of the problem is identified, we disable the test on +-- Windows. + +main :: IO () +main = unless isWindows $ do + stack ["clean"] + stack ["build", "--dry-run"] + stack ["sdist", "."] diff --git a/tests/integration/tests/6372-sdist-unicode-test/files/package.yaml b/tests/integration/tests/6372-sdist-unicode-test/files/package.yaml new file mode 100644 index 0000000000..30b809b389 --- /dev/null +++ b/tests/integration/tests/6372-sdist-unicode-test/files/package.yaml @@ -0,0 +1,8 @@ +name: test١٢٣ +version: 0.1.0.0 +license: BSD-3-Clause +description: Test of Unicode characters in file names +dependencies: +- base < 5 +library: + source-dirs: src diff --git "a/tests/integration/tests/6372-sdist-unicode-test/files/src/Lib\316\261\316\262\316\263.hs" "b/tests/integration/tests/6372-sdist-unicode-test/files/src/Lib\316\261\316\262\316\263.hs" new file mode 100644 index 0000000000..62cdaa6007 --- /dev/null +++ "b/tests/integration/tests/6372-sdist-unicode-test/files/src/Lib\316\261\316\262\316\263.hs" @@ -0,0 +1,6 @@ +module Libαβγ + ( someFunc + ) where + +someFunc :: IO () +someFunc = putStrLn "someFunc" diff --git a/tests/integration/tests/6372-sdist-unicode-test/files/stack.yaml b/tests/integration/tests/6372-sdist-unicode-test/files/stack.yaml new file mode 100644 index 0000000000..d3ca44cf59 --- /dev/null +++ b/tests/integration/tests/6372-sdist-unicode-test/files/stack.yaml @@ -0,0 +1 @@ +snapshot: lts-22.7 diff --git "a/tests/integration/tests/6372-sdist-unicode-test/files/test\331\241\331\242\331\243.cabal" "b/tests/integration/tests/6372-sdist-unicode-test/files/test\331\241\331\242\331\243.cabal" new file mode 100644 index 0000000000..8525055843 --- /dev/null +++ "b/tests/integration/tests/6372-sdist-unicode-test/files/test\331\241\331\242\331\243.cabal" @@ -0,0 +1,24 @@ +cabal-version: 2.2 + +-- This file has been generated from package.yaml by hpack version 0.36.0. +-- +-- see: https://github.com/sol/hpack + +name: test١٢٣ +version: 0.1.0.0 +description: Test of Unicode characters in file names +license: BSD-3-Clause +build-type: Simple + +library + exposed-modules: + Libαβγ + other-modules: + Paths_test١٢٣ + autogen-modules: + Paths_test١٢٣ + hs-source-dirs: + src + build-depends: + base <5 + default-language: Haskell2010