module Data.String.Unicode
(
Unicode,
UString,
UTF8Char,
UTF8String,
UStringWithErrors,
DecodingFct,
DecodingFctEmbedErrors,
utf8ToUnicode
, utf8ToUnicodeEmbedErrors
, latin1ToUnicode
, ucs2ToUnicode
, ucs2BigEndianToUnicode
, ucs2LittleEndianToUnicode
, utf16beToUnicode
, utf16leToUnicode
, unicodeCharToUtf8
, unicodeToUtf8
, unicodeToXmlEntity
, unicodeToLatin1
, unicodeRemoveNoneAscii
, unicodeRemoveNoneLatin1
, intToCharRef
, intToCharRefHex
, intToHexString
, getDecodingFct
, getDecodingFctEmbedErrors
, getOutputEncodingFct
, normalizeNL
, guessEncoding
, getOutputEncodingFct'
, unicodeCharToUtf8'
, unicodeCharToXmlEntity'
, unicodeCharToLatin1'
)
where
import Data.Char (toUpper)
import Data.Char.IsoLatinTables
import Data.Char.Properties.XMLCharProps (isXml1ByteChar,
isXmlLatin1Char)
import Data.String.EncodingNames
import Data.String.UTF8Decoding (decodeUtf8,
decodeUtf8EmbedErrors)
type Unicode = Char
type UString = [Unicode]
type UTF8Char = Char
type UTF8String = String
type DecodingFct = String -> (UString, [String])
type UStringWithErrors = [Either String Char]
type DecodingFctEmbedErrors = String -> UStringWithErrors
unicodeToUtf8 :: UString -> UTF8String
unicodeToUtf8 :: [Char] -> [Char]
unicodeToUtf8 = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Char]
unicodeCharToUtf8
unicodeCharToUtf8 :: Unicode -> UTF8String
unicodeCharToUtf8 :: Char -> [Char]
unicodeCharToUtf8 Char
c
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x0000007F
= [ forall a. Enum a => Int -> a
toEnum Int
i ]
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00000080 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x000007FF
= [ forall a. Enum a => Int -> a
toEnum (Int
0xC0 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00000800 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x0000FFFF
= [ forall a. Enum a => Int -> a
toEnum (Int
0xE0 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00010000 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x001FFFFF
= [ forall a. Enum a => Int -> a
toEnum (Int
0xF0 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00200000 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x03FFFFFF
= [ forall a. Enum a => Int -> a
toEnum (Int
0xF8 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000000)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x04000000 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x7FFFFFFF
= [ forall a. Enum a => Int -> a
toEnum (Int
0xFC forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000000)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)
, forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)
]
| Bool
otherwise
= forall a. HasCallStack => [Char] -> a
error ([Char]
"unicodeCharToUtf8: illegal integer argument " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
i)
where
i :: Int
i = forall a. Enum a => a -> Int
fromEnum Char
c
latin1ToUnicode :: String -> UString
latin1ToUnicode :: [Char] -> [Char]
latin1ToUnicode = forall a. a -> a
id
latinToUnicode :: [(Char, Char)] -> String -> UString
latinToUnicode :: [(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
tt
= forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
charToUni
where
charToUni :: Char -> Char
charToUni Char
c =
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Char
src,Char
dst) Char
r ->
case forall a. Ord a => a -> a -> Ordering
compare Char
c Char
src of
Ordering
EQ -> Char
dst
Ordering
LT -> Char
c
Ordering
GT -> Char
r) Char
c [(Char, Char)]
tt
decodeAscii :: DecodingFct
decodeAscii :: DecodingFct
decodeAscii
= forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. [Either a b] -> ([a], [b])
partitionEither forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> UStringWithErrors
decodeAsciiEmbedErrors
decodeAsciiEmbedErrors :: String -> UStringWithErrors
decodeAsciiEmbedErrors :: [Char] -> UStringWithErrors
decodeAsciiEmbedErrors [Char]
str
= forall a b. (a -> b) -> [a] -> [b]
map (\(Char
c,Int
pos) -> if Char -> Bool
isValid Char
c
then forall a b. b -> Either a b
Right Char
c
else forall a b. a -> Either a b
Left (forall {a} {a}. (Show a, Show a) => a -> a -> [Char]
toErrStr Char
c Int
pos)) [(Char, Int)]
posStr
where
posStr :: [(Char, Int)]
posStr = forall a b. [a] -> [b] -> [(a, b)]
zip [Char]
str [(Int
0::Int)..]
toErrStr :: a -> a -> [Char]
toErrStr a
errChr a
pos
= [Char]
" at input position " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show a
pos forall a. [a] -> [a] -> [a]
++ [Char]
": none ASCII char " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show a
errChr
isValid :: Char -> Bool
isValid Char
x = Char
x forall a. Ord a => a -> a -> Bool
< Char
'\x80'
ucs2BigEndianToUnicode :: String -> UString
ucs2BigEndianToUnicode :: [Char] -> [Char]
ucs2BigEndianToUnicode (Char
b : Char
l : [Char]
r)
= forall a. Enum a => Int -> a
toEnum (forall a. Enum a => a -> Int
fromEnum Char
b forall a. Num a => a -> a -> a
* Int
256 forall a. Num a => a -> a -> a
+ forall a. Enum a => a -> Int
fromEnum Char
l) forall a. a -> [a] -> [a]
: [Char] -> [Char]
ucs2BigEndianToUnicode [Char]
r
ucs2BigEndianToUnicode []
= []
ucs2BigEndianToUnicode [Char]
_
= []
ucs2LittleEndianToUnicode :: String -> UString
ucs2LittleEndianToUnicode :: [Char] -> [Char]
ucs2LittleEndianToUnicode (Char
l : Char
b : [Char]
r)
= forall a. Enum a => Int -> a
toEnum (forall a. Enum a => a -> Int
fromEnum Char
b forall a. Num a => a -> a -> a
* Int
256 forall a. Num a => a -> a -> a
+ forall a. Enum a => a -> Int
fromEnum Char
l) forall a. a -> [a] -> [a]
: [Char] -> [Char]
ucs2LittleEndianToUnicode [Char]
r
ucs2LittleEndianToUnicode []
= []
ucs2LittleEndianToUnicode [Char
_]
= []
ucs2ToUnicode :: String -> UString
ucs2ToUnicode :: [Char] -> [Char]
ucs2ToUnicode (Char
'\xFE':Char
'\xFF':[Char]
s)
= [Char] -> [Char]
ucs2BigEndianToUnicode [Char]
s
ucs2ToUnicode (Char
'\xFF':Char
'\xFE':[Char]
s)
= [Char] -> [Char]
ucs2LittleEndianToUnicode [Char]
s
ucs2ToUnicode [Char]
s
= [Char] -> [Char]
ucs2BigEndianToUnicode [Char]
s
utf8ToUnicode :: DecodingFct
utf8ToUnicode :: DecodingFct
utf8ToUnicode (Char
'\xEF':Char
'\xBB':Char
'\xBF':[Char]
s)
= DecodingFct
decodeUtf8 [Char]
s
utf8ToUnicode [Char]
s
= DecodingFct
decodeUtf8 [Char]
s
utf8ToUnicodeEmbedErrors :: DecodingFctEmbedErrors
utf8ToUnicodeEmbedErrors :: [Char] -> UStringWithErrors
utf8ToUnicodeEmbedErrors (Char
'\xEF':Char
'\xBB':Char
'\xBF':[Char]
s)
= [Char] -> UStringWithErrors
decodeUtf8EmbedErrors [Char]
s
utf8ToUnicodeEmbedErrors [Char]
s
= [Char] -> UStringWithErrors
decodeUtf8EmbedErrors [Char]
s
utf16beToUnicode :: String -> UString
utf16beToUnicode :: [Char] -> [Char]
utf16beToUnicode (Char
'\xFE':Char
'\xFF':[Char]
s)
= [Char] -> [Char]
ucs2BigEndianToUnicode [Char]
s
utf16beToUnicode [Char]
s
= [Char] -> [Char]
ucs2BigEndianToUnicode [Char]
s
utf16leToUnicode :: String -> UString
utf16leToUnicode :: [Char] -> [Char]
utf16leToUnicode (Char
'\xFF':Char
'\xFE':[Char]
s)
= [Char] -> [Char]
ucs2LittleEndianToUnicode [Char]
s
utf16leToUnicode [Char]
s
= [Char] -> [Char]
ucs2LittleEndianToUnicode [Char]
s
unicodeToXmlEntity :: UString -> String
unicodeToXmlEntity :: [Char] -> [Char]
unicodeToXmlEntity
= (Char -> Bool) -> (Char -> [Char]) -> [Char] -> [Char]
escape Char -> Bool
isXml1ByteChar (Int -> [Char]
intToCharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum)
unicodeToLatin1 :: UString -> String
unicodeToLatin1 :: [Char] -> [Char]
unicodeToLatin1
= (Char -> Bool) -> (Char -> [Char]) -> [Char] -> [Char]
escape Char -> Bool
isXmlLatin1Char (Int -> [Char]
intToCharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum)
escape :: (Unicode -> Bool) -> (Unicode -> String) -> UString -> String
escape :: (Char -> Bool) -> (Char -> [Char]) -> [Char] -> [Char]
escape Char -> Bool
check Char -> [Char]
esc =
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Char
uc -> if Char -> Bool
check Char
uc then [Char
uc] else Char -> [Char]
esc Char
uc)
unicodeRemoveNoneAscii :: UString -> String
unicodeRemoveNoneAscii :: [Char] -> [Char]
unicodeRemoveNoneAscii
= forall a. (a -> Bool) -> [a] -> [a]
filter Char -> Bool
isXml1ByteChar
unicodeRemoveNoneLatin1 :: UString -> String
unicodeRemoveNoneLatin1 :: [Char] -> [Char]
unicodeRemoveNoneLatin1
= forall a. (a -> Bool) -> [a] -> [a]
filter Char -> Bool
isXmlLatin1Char
intToCharRef :: Int -> String
intToCharRef :: Int -> [Char]
intToCharRef Int
i
= [Char]
"&#" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
i forall a. [a] -> [a] -> [a]
++ [Char]
";"
intToCharRefHex :: Int -> String
intToCharRefHex :: Int -> [Char]
intToCharRefHex Int
i
= [Char]
"&#x" forall a. [a] -> [a] -> [a]
++ [Char]
h2 forall a. [a] -> [a] -> [a]
++ [Char]
";"
where
h1 :: [Char]
h1 = Int -> [Char]
intToHexString Int
i
h2 :: [Char]
h2 = if forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
h1 forall a. Integral a => a -> a -> a
`mod` Int
2 forall a. Eq a => a -> a -> Bool
== Int
1
then Char
'0'forall a. a -> [a] -> [a]
: [Char]
h1
else [Char]
h1
intToHexString :: Int -> String
intToHexString :: Int -> [Char]
intToHexString Int
i
| Int
i forall a. Eq a => a -> a -> Bool
== Int
0
= [Char]
"0"
| Int
i forall a. Ord a => a -> a -> Bool
> Int
0
= Int -> [Char]
intToStr Int
i
| Bool
otherwise
= forall a. HasCallStack => [Char] -> a
error ([Char]
"intToHexString: negative argument " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
i)
where
intToStr :: Int -> [Char]
intToStr Int
0 = [Char]
""
intToStr Int
i' = Int -> [Char]
intToStr (Int
i' forall a. Integral a => a -> a -> a
`div` Int
16) forall a. [a] -> [a] -> [a]
++ [Int -> Char
fourBitsToChar (Int
i' forall a. Integral a => a -> a -> a
`mod` Int
16)]
fourBitsToChar :: Int -> Char
fourBitsToChar :: Int -> Char
fourBitsToChar Int
i = [Char]
"0123456789ABCDEF" forall a. [a] -> Int -> a
!! Int
i
{-# INLINE fourBitsToChar #-}
normalizeNL :: String -> String
normalizeNL :: [Char] -> [Char]
normalizeNL (Char
'\r' : Char
'\n' : [Char]
rest) = Char
'\n' forall a. a -> [a] -> [a]
: [Char] -> [Char]
normalizeNL [Char]
rest
normalizeNL (Char
'\r' : [Char]
rest) = Char
'\n' forall a. a -> [a] -> [a]
: [Char] -> [Char]
normalizeNL [Char]
rest
normalizeNL (Char
c : [Char]
rest) = Char
c forall a. a -> [a] -> [a]
: [Char] -> [Char]
normalizeNL [Char]
rest
normalizeNL [] = []
decodingTable :: [(String, DecodingFct)]
decodingTable :: [([Char], DecodingFct)]
decodingTable
= [ ([Char]
utf8, DecodingFct
utf8ToUnicode )
, ([Char]
isoLatin1, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct [Char] -> [Char]
latin1ToUnicode )
, ([Char]
usAscii, DecodingFct
decodeAscii )
, ([Char]
ucs2, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct [Char] -> [Char]
ucs2ToUnicode )
, ([Char]
utf16, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct [Char] -> [Char]
ucs2ToUnicode )
, ([Char]
utf16be, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct [Char] -> [Char]
utf16beToUnicode )
, ([Char]
utf16le, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct [Char] -> [Char]
utf16leToUnicode )
, ([Char]
iso8859_2, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_2) )
, ([Char]
iso8859_3, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_3) )
, ([Char]
iso8859_4, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_4) )
, ([Char]
iso8859_5, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_5) )
, ([Char]
iso8859_6, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_6) )
, ([Char]
iso8859_7, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_7) )
, ([Char]
iso8859_8, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_8) )
, ([Char]
iso8859_9, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_9) )
, ([Char]
iso8859_10, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_10) )
, ([Char]
iso8859_11, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_11) )
, ([Char]
iso8859_13, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_13) )
, ([Char]
iso8859_14, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_14) )
, ([Char]
iso8859_15, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_15) )
, ([Char]
iso8859_16, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_16) )
, ([Char]
unicodeString, forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct forall a. a -> a
id )
, ([Char]
"", forall {t} {a} {a}. (t -> a) -> t -> (a, [a])
liftDecFct forall a. a -> a
id )
]
where
liftDecFct :: (t -> a) -> t -> (a, [a])
liftDecFct t -> a
df = \ t
s -> (t -> a
df t
s, [])
getDecodingFct :: String -> Maybe DecodingFct
getDecodingFct :: [Char] -> Maybe DecodingFct
getDecodingFct [Char]
enc
= forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper [Char]
enc) [([Char], DecodingFct)]
decodingTable
decodingTableEmbedErrors :: [(String, DecodingFctEmbedErrors)]
decodingTableEmbedErrors :: [([Char], [Char] -> UStringWithErrors)]
decodingTableEmbedErrors
= [ ([Char]
utf8, [Char] -> UStringWithErrors
utf8ToUnicodeEmbedErrors )
, ([Char]
isoLatin1, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct [Char] -> [Char]
latin1ToUnicode )
, ([Char]
usAscii, [Char] -> UStringWithErrors
decodeAsciiEmbedErrors )
, ([Char]
ucs2, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct [Char] -> [Char]
ucs2ToUnicode )
, ([Char]
utf16, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct [Char] -> [Char]
ucs2ToUnicode )
, ([Char]
utf16be, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct [Char] -> [Char]
utf16beToUnicode )
, ([Char]
utf16le, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct [Char] -> [Char]
utf16leToUnicode )
, ([Char]
iso8859_2, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_2) )
, ([Char]
iso8859_3, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_3) )
, ([Char]
iso8859_4, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_4) )
, ([Char]
iso8859_5, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_5) )
, ([Char]
iso8859_6, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_6) )
, ([Char]
iso8859_7, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_7) )
, ([Char]
iso8859_8, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_8) )
, ([Char]
iso8859_9, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_9) )
, ([Char]
iso8859_10, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_10) )
, ([Char]
iso8859_11, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_11) )
, ([Char]
iso8859_13, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_13) )
, ([Char]
iso8859_14, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_14) )
, ([Char]
iso8859_15, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_15) )
, ([Char]
iso8859_16, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct ([(Char, Char)] -> [Char] -> [Char]
latinToUnicode [(Char, Char)]
iso_8859_16) )
, ([Char]
unicodeString, forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct forall a. a -> a
id )
, ([Char]
"", forall {a} {a} {a}. (a -> [a]) -> a -> [Either a a]
liftDecFct forall a. a -> a
id )
]
where
liftDecFct :: (a -> [a]) -> a -> [Either a a]
liftDecFct a -> [a]
df = forall a b. (a -> b) -> [a] -> [b]
map forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [a]
df
getDecodingFctEmbedErrors :: String -> Maybe DecodingFctEmbedErrors
getDecodingFctEmbedErrors :: [Char] -> Maybe ([Char] -> UStringWithErrors)
getDecodingFctEmbedErrors [Char]
enc
= forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper [Char]
enc) [([Char], [Char] -> UStringWithErrors)]
decodingTableEmbedErrors
outputEncodingTable :: [(String, (UString -> String))]
outputEncodingTable :: [([Char], [Char] -> [Char])]
outputEncodingTable
= [ ([Char]
utf8, [Char] -> [Char]
unicodeToUtf8 )
, ([Char]
isoLatin1, [Char] -> [Char]
unicodeToLatin1 )
, ([Char]
usAscii, [Char] -> [Char]
unicodeToXmlEntity )
, ([Char]
unicodeString, forall a. a -> a
id )
, ([Char]
"", [Char] -> [Char]
unicodeToUtf8 )
]
getOutputEncodingFct :: String -> Maybe (String -> UString)
getOutputEncodingFct :: [Char] -> Maybe ([Char] -> [Char])
getOutputEncodingFct [Char]
enc
= forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper [Char]
enc) [([Char], [Char] -> [Char])]
outputEncodingTable
guessEncoding :: String -> String
guessEncoding :: [Char] -> [Char]
guessEncoding (Char
'\xFF':Char
'\xFE':Char
'\x00':Char
'\x00':[Char]
_) = [Char]
"UCS-4LE"
guessEncoding (Char
'\xFF':Char
'\xFE':[Char]
_) = [Char]
"UTF-16LE"
guessEncoding (Char
'\xFE':Char
'\xFF':Char
'\x00':Char
'\x00':[Char]
_) = [Char]
"UCS-4-3421"
guessEncoding (Char
'\xFE':Char
'\xFF':[Char]
_) = [Char]
"UTF-16BE"
guessEncoding (Char
'\xEF':Char
'\xBB':Char
'\xBF':[Char]
_) = [Char]
utf8
guessEncoding (Char
'\x00':Char
'\x00':Char
'\xFE':Char
'\xFF':[Char]
_) = [Char]
"UCS-4BE"
guessEncoding (Char
'\x00':Char
'\x00':Char
'\xFF':Char
'\xFE':[Char]
_) = [Char]
"UCS-4-2143"
guessEncoding (Char
'\x00':Char
'\x00':Char
'\x00':Char
'\x3C':[Char]
_) = [Char]
"UCS-4BE"
guessEncoding (Char
'\x3C':Char
'\x00':Char
'\x00':Char
'\x00':[Char]
_) = [Char]
"UCS-4LE"
guessEncoding (Char
'\x00':Char
'\x00':Char
'\x3C':Char
'\x00':[Char]
_) = [Char]
"UCS-4-2143"
guessEncoding (Char
'\x00':Char
'\x3C':Char
'\x00':Char
'\x00':[Char]
_) = [Char]
"UCS-4-3412"
guessEncoding (Char
'\x00':Char
'\x3C':Char
'\x00':Char
'\x3F':[Char]
_) = [Char]
"UTF-16BE"
guessEncoding (Char
'\x3C':Char
'\x00':Char
'\x3F':Char
'\x00':[Char]
_) = [Char]
"UTF-16LE"
guessEncoding (Char
'\x4C':Char
'\x6F':Char
'\xA7':Char
'\x94':[Char]
_) = [Char]
"EBCDIC"
guessEncoding [Char]
_ = [Char]
""
swap :: (a,b) -> (b,a)
swap :: forall a b. (a, b) -> (b, a)
swap (a
x,b
y) = (b
y,a
x)
{-# INLINE swap #-}
partitionEither :: [Either a b] -> ([a], [b])
partitionEither :: forall a b. [Either a b] -> ([a], [b])
partitionEither =
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Either a b
x ~([a]
ls,[b]
rs) -> forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\a
l -> (a
lforall a. a -> [a] -> [a]
:[a]
ls,[b]
rs)) (\b
r -> ([a]
ls,b
rforall a. a -> [a] -> [a]
:[b]
rs)) Either a b
x) ([],[])
{-# INLINE partitionEither #-}
type StringFct = String -> String
outputEncodingTable' :: [(String, (Char -> StringFct))]
outputEncodingTable' :: [([Char], Char -> [Char] -> [Char])]
outputEncodingTable'
= [ ([Char]
utf8, Char -> [Char] -> [Char]
unicodeCharToUtf8' )
, ([Char]
isoLatin1, Char -> [Char] -> [Char]
unicodeCharToLatin1' )
, ([Char]
usAscii, Char -> [Char] -> [Char]
unicodeCharToXmlEntity' )
, ([Char]
"", Char -> [Char] -> [Char]
unicodeCharToUtf8' )
]
getOutputEncodingFct' :: String -> Maybe (Char -> StringFct)
getOutputEncodingFct' :: [Char] -> Maybe (Char -> [Char] -> [Char])
getOutputEncodingFct' [Char]
enc
= forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toUpper [Char]
enc) [([Char], Char -> [Char] -> [Char])]
outputEncodingTable'
unicodeCharToUtf8' :: Char -> StringFct
unicodeCharToUtf8' :: Char -> [Char] -> [Char]
unicodeCharToUtf8' Char
c
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x0000007F
= (Char
c forall a. a -> [a] -> [a]
:)
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00000080 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x000007FF
= ((forall a. Enum a => Int -> a
toEnum (Int
0xC0 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) ) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:)
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00000800 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x0000FFFF
= ((forall a. Enum a => Int -> a
toEnum (Int
0xE0 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) ) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:)
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00010000 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x001FFFFF
= ((forall a. Enum a => Int -> a
toEnum (Int
0xF0 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000) ) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:)
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x00200000 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x03FFFFFF
= ((forall a. Enum a => Int -> a
toEnum (Int
0xF8 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000000) ) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:)
| Int
i forall a. Ord a => a -> a -> Bool
>= Int
0x04000000 Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Int
0x7FFFFFFF
= ((forall a. Enum a => Int -> a
toEnum (Int
0xFC forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000000) ) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x1000) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ (Int
i forall a. Integral a => a -> a -> a
`div` Int
0x40) forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
((forall a. Enum a => Int -> a
toEnum (Int
0x80 forall a. Num a => a -> a -> a
+ Int
i forall a. Integral a => a -> a -> a
`mod` Int
0x40)) forall a. a -> [a] -> [a]
:)
| Bool
otherwise
= forall a. HasCallStack => [Char] -> a
error ([Char]
"unicodeCharToUtf8: illegal integer argument " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Int
i)
where
i :: Int
i = forall a. Enum a => a -> Int
fromEnum Char
c
unicodeCharToXmlEntity' :: Char -> StringFct
unicodeCharToXmlEntity' :: Char -> [Char] -> [Char]
unicodeCharToXmlEntity' Char
c
| Char -> Bool
isXml1ByteChar Char
c = (Char
c forall a. a -> [a] -> [a]
:)
| Bool
otherwise = ((Int -> [Char]
intToCharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum forall a b. (a -> b) -> a -> b
$ Char
c) forall a. [a] -> [a] -> [a]
++)
unicodeCharToLatin1' :: Char -> StringFct
unicodeCharToLatin1' :: Char -> [Char] -> [Char]
unicodeCharToLatin1' Char
c
| Char -> Bool
isXmlLatin1Char Char
c = (Char
c forall a. a -> [a] -> [a]
:)
| Bool
otherwise = ((Int -> [Char]
intToCharRef forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Enum a => a -> Int
fromEnum forall a b. (a -> b) -> a -> b
$ Char
c) forall a. [a] -> [a] -> [a]
++)