module Data.String.UTF8
( encode
, decode
, decodeEmbedErrors
, encodeOne
, decodeOne
, Error
) where
import Data.Char (ord, chr)
import Data.Word (Word8, Word16, Word32)
import Data.Bits (shiftL, shiftR, (.&.), (.|.))
import Data.List (unfoldr)
encodeOne :: Char -> [Word8]
encodeOne :: Char -> [Word8]
encodeOne Char
c
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0x0080 = Word8 -> [Word8]
encodeOne_onebyte Word8
n8
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0x0800 = Word16 -> [Word8]
encodeOne_twobyte Word16
n16
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0xD800 = Word16 -> [Word8]
encodeOne_threebyte Word16
n16
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0xE000 = forall a. HasCallStack => [Char] -> a
error [Char]
"encodeUTF8: ord returned a surrogate value"
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0x10000 = Word16 -> [Word8]
encodeOne_threebyte Word16
n16
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0x10FFFF = Word32 -> [Word8]
encodeOne_fourbyte Word32
n32
| Bool
otherwise = forall a. HasCallStack => [Char] -> a
error [Char]
"encodeUTF8: ord returned a value above 0x10FFFF"
where
n :: Int
n = Char -> Int
ord Char
c :: Int
n8 :: Word8
n8 = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n :: Word8
n16 :: Word16
n16 = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n :: Word16
n32 :: Word32
n32 = forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n :: Word32
encode :: [Char] -> [Word8]
encode :: [Char] -> [Word8]
encode = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Word8]
encodeOne
encodeOne_onebyte :: Word8 -> [Word8]
encodeOne_onebyte :: Word8 -> [Word8]
encodeOne_onebyte Word8
cp = [Word8
cp]
encodeOne_twobyte :: Word16 -> [Word8]
encodeOne_twobyte :: Word16 -> [Word8]
encodeOne_twobyte Word16
cp = [(Word8
0xC0forall a. Bits a => a -> a -> a
.|.Word8
ys), (Word8
0x80forall a. Bits a => a -> a -> a
.|.Word8
xs)]
where
xs, ys :: Word8
ys :: Word8
ys = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Word16
cp Int
6)
xs :: Word8
xs = (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
cp) forall a. Bits a => a -> a -> a
.&. Word8
0x3F
encodeOne_threebyte :: Word16 -> [Word8]
encodeOne_threebyte :: Word16 -> [Word8]
encodeOne_threebyte Word16
cp = [(Word8
0xE0forall a. Bits a => a -> a -> a
.|.Word8
zs), (Word8
0x80forall a. Bits a => a -> a -> a
.|.Word8
ys), (Word8
0x80forall a. Bits a => a -> a -> a
.|.Word8
xs)]
where
xs, ys, zs :: Word8
xs :: Word8
xs = (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
cp) forall a. Bits a => a -> a -> a
.&. Word8
0x3F
ys :: Word8
ys = (forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Word16
cp Int
6)) forall a. Bits a => a -> a -> a
.&. Word8
0x3F
zs :: Word8
zs = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Word16
cp Int
12)
encodeOne_fourbyte :: Word32 -> [Word8]
encodeOne_fourbyte :: Word32 -> [Word8]
encodeOne_fourbyte Word32
cp = [Word8
0xF0forall a. Bits a => a -> a -> a
.|.Word8
us, Word8
0x80forall a. Bits a => a -> a -> a
.|.Word8
zs, Word8
0x80forall a. Bits a => a -> a -> a
.|.Word8
ys, Word8
0x80forall a. Bits a => a -> a -> a
.|.Word8
xs]
where
xs, ys, zs, us :: Word8
xs :: Word8
xs = (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
cp) forall a. Bits a => a -> a -> a
.&. Word8
0x3F
ys :: Word8
ys = (forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Word32
cp Int
6)) forall a. Bits a => a -> a -> a
.&. Word8
0x3F
zs :: Word8
zs = (forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Word32
cp Int
12)) forall a. Bits a => a -> a -> a
.&. Word8
0x3F
us :: Word8
us = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Word32
cp Int
18)
data Error
= InvalidFirstByte
| InvalidLaterByte Int
| Truncated Int Int
| NonShortest Int Int
| ValueOutOfBounds
| Surrogate
deriving (Int -> Error -> ShowS
[Error] -> ShowS
Error -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Error] -> ShowS
$cshowList :: [Error] -> ShowS
show :: Error -> [Char]
$cshow :: Error -> [Char]
showsPrec :: Int -> Error -> ShowS
$cshowsPrec :: Int -> Error -> ShowS
Show, Error -> Error -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Error -> Error -> Bool
$c/= :: Error -> Error -> Bool
== :: Error -> Error -> Bool
$c== :: Error -> Error -> Bool
Eq)
first_bits_not_10 :: Word8 -> Bool
first_bits_not_10 :: Word8 -> Bool
first_bits_not_10 Word8
b
| (Word8
bforall a. Bits a => a -> a -> a
.&.Word8
0xC0) forall a. Eq a => a -> a -> Bool
/= Word8
0x80 = Bool
True
| Bool
otherwise = Bool
False
decodeOne :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne bs :: [Word8]
bs@(Word8
b1:[Word8]
rest)
| Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0x80 = [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_onebyte [Word8]
bs
| Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 = (forall a b. a -> Either a b
Left Error
InvalidFirstByte, Int
1, [Word8]
rest)
| Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xE0 = [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_twobyte [Word8]
bs
| Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xF0 = [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_threebyte [Word8]
bs
| Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xF5 = [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_fourbyte [Word8]
bs
| Bool
otherwise = (forall a b. a -> Either a b
Left Error
ValueOutOfBounds, Int
1, [Word8]
rest)
decodeOne [] = forall a. HasCallStack => [Char] -> a
error [Char]
"UTF8.decodeOne: No input"
decodeOne_onebyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_onebyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_onebyte (Word8
b:[Word8]
bs) = (forall a b. b -> Either a b
Right (forall a. Integral a => a -> Char
cpToChar Word8
b), Int
1, [Word8]
bs)
decodeOne_onebyte[] = forall a. HasCallStack => [Char] -> a
error [Char]
"UTF8.decodeOne_onebyte: No input (can't happen)"
cpToChar :: Integral a => a -> Char
cpToChar :: forall a. Integral a => a -> Char
cpToChar = Int -> Char
chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral
decodeOne_twobyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_twobyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_twobyte (Word8
_:[])
= (forall a b. a -> Either a b
Left (Int -> Int -> Error
Truncated Int
1 Int
2), Int
1, [])
decodeOne_twobyte (Word8
b1:Word8
b2:[Word8]
bs)
| Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xC2 = (forall a b. a -> Either a b
Left (Int -> Int -> Error
NonShortest Int
2 Int
1), Int
2, [Word8]
bs)
| Word8 -> Bool
first_bits_not_10 Word8
b2 = (forall a b. a -> Either a b
Left (Int -> Error
InvalidLaterByte Int
1), Int
1, (Word8
b2forall a. a -> [a] -> [a]
:[Word8]
bs))
| Bool
otherwise = (forall a b. b -> Either a b
Right (forall a. Integral a => a -> Char
cpToChar Word32
result), Int
2, [Word8]
bs)
where
xs, ys, result :: Word32
xs :: Word32
xs = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b2forall a. Bits a => a -> a -> a
.&.Word8
0x3F)
ys :: Word32
ys = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b1forall a. Bits a => a -> a -> a
.&.Word8
0x1F)
result :: Word32
result = forall a. Bits a => a -> Int -> a
shiftL Word32
ys Int
6 forall a. Bits a => a -> a -> a
.|. Word32
xs
decodeOne_twobyte[] = forall a. HasCallStack => [Char] -> a
error [Char]
"UTF8.decodeOne_twobyte: No input (can't happen)"
decodeOne_threebyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_threebyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_threebyte (Word8
_:[]) = Int -> (Either Error Char, Int, [Word8])
threebyte_truncated Int
1
decodeOne_threebyte (Word8
_:Word8
_:[]) = Int -> (Either Error Char, Int, [Word8])
threebyte_truncated Int
2
decodeOne_threebyte bs :: [Word8]
bs@(Word8
b1:Word8
b2:Word8
b3:[Word8]
rest)
| Word8 -> Bool
first_bits_not_10 Word8
b2
= (forall a b. a -> Either a b
Left (Int -> Error
InvalidLaterByte Int
1), Int
1, forall a. Int -> [a] -> [a]
drop Int
1 [Word8]
bs)
| Word8 -> Bool
first_bits_not_10 Word8
b3
= (forall a b. a -> Either a b
Left (Int -> Error
InvalidLaterByte Int
2), Int
2, forall a. Int -> [a] -> [a]
drop Int
2 [Word8]
bs)
| Word32
result forall a. Ord a => a -> a -> Bool
< Word32
0x0080
= (forall a b. a -> Either a b
Left (Int -> Int -> Error
NonShortest Int
3 Int
1), Int
3, [Word8]
rest)
| Word32
result forall a. Ord a => a -> a -> Bool
< Word32
0x0800
= (forall a b. a -> Either a b
Left (Int -> Int -> Error
NonShortest Int
3 Int
2), Int
3, [Word8]
rest)
| Word32
result forall a. Ord a => a -> a -> Bool
>= Word32
0xD800 Bool -> Bool -> Bool
&& Word32
result forall a. Ord a => a -> a -> Bool
< Word32
0xE000
= (forall a b. a -> Either a b
Left Error
Surrogate, Int
3, [Word8]
rest)
| Bool
otherwise
= (forall a b. b -> Either a b
Right (forall a. Integral a => a -> Char
cpToChar Word32
result), Int
3, [Word8]
rest)
where
xs, ys, zs, result :: Word32
xs :: Word32
xs = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b3forall a. Bits a => a -> a -> a
.&.Word8
0x3F)
ys :: Word32
ys = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b2forall a. Bits a => a -> a -> a
.&.Word8
0x3F)
zs :: Word32
zs = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b1forall a. Bits a => a -> a -> a
.&.Word8
0x0F)
result :: Word32
result = forall a. Bits a => a -> Int -> a
shiftL Word32
zs Int
12 forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL Word32
ys Int
6 forall a. Bits a => a -> a -> a
.|. Word32
xs
decodeOne_threebyte[]
= forall a. HasCallStack => [Char] -> a
error [Char]
"UTF8.decodeOne_threebyte: No input (can't happen)"
threebyte_truncated :: Int -> (Either Error Char, Int, [Word8])
threebyte_truncated :: Int -> (Either Error Char, Int, [Word8])
threebyte_truncated Int
n = (forall a b. a -> Either a b
Left (Int -> Int -> Error
Truncated Int
n Int
3), Int
n, [])
decodeOne_fourbyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_fourbyte :: [Word8] -> (Either Error Char, Int, [Word8])
decodeOne_fourbyte (Word8
_:[]) = Int -> (Either Error Char, Int, [Word8])
fourbyte_truncated Int
1
decodeOne_fourbyte (Word8
_:Word8
_:[]) = Int -> (Either Error Char, Int, [Word8])
fourbyte_truncated Int
2
decodeOne_fourbyte (Word8
_:Word8
_:Word8
_:[]) = Int -> (Either Error Char, Int, [Word8])
fourbyte_truncated Int
3
decodeOne_fourbyte bs :: [Word8]
bs@(Word8
b1:Word8
b2:Word8
b3:Word8
b4:[Word8]
rest)
| Word8 -> Bool
first_bits_not_10 Word8
b2
= (forall a b. a -> Either a b
Left (Int -> Error
InvalidLaterByte Int
1), Int
1, forall a. Int -> [a] -> [a]
drop Int
1 [Word8]
bs)
| Word8 -> Bool
first_bits_not_10 Word8
b3
= (forall a b. a -> Either a b
Left (Int -> Error
InvalidLaterByte Int
2), Int
2, forall a. Int -> [a] -> [a]
drop Int
2 [Word8]
bs)
| Word8 -> Bool
first_bits_not_10 Word8
b4
= (forall a b. a -> Either a b
Left (Int -> Error
InvalidLaterByte Int
3), Int
3, forall a. Int -> [a] -> [a]
drop Int
3 [Word8]
bs)
| Word32
result forall a. Ord a => a -> a -> Bool
< Word32
0x0080
= (forall a b. a -> Either a b
Left (Int -> Int -> Error
NonShortest Int
4 Int
1), Int
4, [Word8]
rest)
| Word32
result forall a. Ord a => a -> a -> Bool
< Word32
0x0800
= (forall a b. a -> Either a b
Left (Int -> Int -> Error
NonShortest Int
4 Int
2), Int
4, [Word8]
rest)
| Word32
result forall a. Ord a => a -> a -> Bool
< Word32
0x10000
= (forall a b. a -> Either a b
Left (Int -> Int -> Error
NonShortest Int
4 Int
3), Int
4, [Word8]
rest)
| Word32
result forall a. Ord a => a -> a -> Bool
> Word32
0x10FFFF
= (forall a b. a -> Either a b
Left Error
ValueOutOfBounds, Int
4, [Word8]
rest)
| Bool
otherwise
= (forall a b. b -> Either a b
Right (forall a. Integral a => a -> Char
cpToChar Word32
result), Int
4, [Word8]
rest)
where
xs, ys, zs, us, result :: Word32
xs :: Word32
xs = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b4 forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
ys :: Word32
ys = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b3 forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
zs :: Word32
zs = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b2 forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
us :: Word32
us = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b1 forall a. Bits a => a -> a -> a
.&. Word8
0x07)
result :: Word32
result = Word32
xs forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL Word32
ys Int
6 forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL Word32
zs Int
12 forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL Word32
us Int
18
decodeOne_fourbyte[]
= forall a. HasCallStack => [Char] -> a
error [Char]
"UTF8.decodeOne_fourbyte: No input (can't happen)"
fourbyte_truncated :: Int -> (Either Error Char, Int, [Word8])
fourbyte_truncated :: Int -> (Either Error Char, Int, [Word8])
fourbyte_truncated Int
n = (forall a b. a -> Either a b
Left (Int -> Int -> Error
Truncated Int
n Int
4), Int
n, [])
decode :: [Word8] -> ([Char], [(Error,Int)])
decode :: [Word8] -> ([Char], [(Error, Int)])
decode = forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. [Either a b] -> ([a], [b])
partitionEither forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> [Either (Error, Int) Char]
decodeEmbedErrors
decodeEmbedErrors :: [Word8] -> [Either (Error,Int) Char]
decodeEmbedErrors :: [Word8] -> [Either (Error, Int) Char]
decodeEmbedErrors =
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr (\(Int
pos,[Word8]
xs) ->
forall a. Bool -> a -> Maybe a
toMaybe
(Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Word8]
xs)
(let (Either Error Char
c,Int
n,[Word8]
rest) = [Word8] -> (Either Error Char, Int, [Word8])
decodeOne [Word8]
xs
in (forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\Error
err -> forall a b. a -> Either a b
Left (Error
err,Int
pos)) forall a b. b -> Either a b
Right Either Error Char
c,
(Int
posforall a. Num a => a -> a -> a
+Int
n,[Word8]
rest)))) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(,) Int
0
swap :: (a,b) -> (b,a)
swap :: forall a b. (a, b) -> (b, a)
swap (a
x,b
y) = (b
y,a
x)
{-# INLINE swap #-}
partitionEither :: [Either a b] -> ([a], [b])
partitionEither :: forall a b. [Either a b] -> ([a], [b])
partitionEither =
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Either a b
x ~([a]
ls,[b]
rs) -> forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\a
l -> (a
lforall a. a -> [a] -> [a]
:[a]
ls,[b]
rs)) (\b
r -> ([a]
ls,b
rforall a. a -> [a] -> [a]
:[b]
rs)) Either a b
x) ([],[])
{-# INLINE partitionEither #-}
toMaybe :: Bool -> a -> Maybe a
toMaybe :: forall a. Bool -> a -> Maybe a
toMaybe Bool
False a
_ = forall a. Maybe a
Nothing
toMaybe Bool
True a
x = forall a. a -> Maybe a
Just a
x
{-# INLINE toMaybe #-}