never executed always true always false
1 module Parse.Literal (literal) where
2
3 import Prelude hiding (exponent)
4 import Data.Char (digitToInt, isSpace)
5 import Text.Parsec ((<|>), (<?>), digit, hexDigit, lookAhead, many1, option, string, try, char, notFollowedBy, choice, anyChar, satisfy, manyTill, many, between, skipMany, skipMany1)
6 import Text.Parsec.Char (octDigit, space, upper)
7 import Parse.Helpers (processAs, escaped, expecting, sandwich, betwixt)
8 import Parse.IParser
9
10 import AST.V0_16
11
12
13 literal :: IParser LiteralValue
14 literal =
15 num <|> (uncurry Str <$> str) <|> (Chr <$> chr)
16
17
18 num :: IParser LiteralValue
19 num =
20 toLiteral <$> (rawNumber <?> "a number")
21
22
23 toLiteral :: String -> LiteralValue
24 toLiteral n
25 | 'x' `elem` n = IntNum (read n) HexadecimalInt
26 | any (`elem` ("eE" :: String)) n = FloatNum (read n) ExponentFloat
27 | any (`elem` ("." :: String)) n = FloatNum (read n) DecimalFloat
28 | otherwise = IntNum (read n) DecimalInt
29
30
31 rawNumber :: IParser String
32 rawNumber =
33 concat <$> sequence
34 [ option "" minus
35 , base16 <|> base10
36 ]
37
38
39 base16 :: IParser String
40 base16 =
41 do _ <- try (string "0x")
42 digits <- many1 hexDigit
43 return ("0x" ++ digits)
44
45
46 base10 :: IParser String
47 base10 =
48 concat <$> sequence
49 [ many1 digit
50 , option "" decimals
51 , option "" exponent
52 ]
53
54
55 minus :: IParser String
56 minus =
57 try $ do
58 _ <- string "-"
59 _ <- lookAhead digit
60 return "-"
61
62
63 decimals :: IParser String
64 decimals =
65 do _ <- try $ lookAhead (string "." >> digit)
66 _ <- string "."
67 n <- many1 digit
68 return ('.' : n)
69
70
71 exponent :: IParser String
72 exponent =
73 do _ <- string "e" <|> string "E"
74 op <- option "" (string "+" <|> string "-")
75 n <- many1 digit
76 return ('e' : op ++ n)
77
78
79 str :: IParser (String, StringRepresentation)
80 str =
81 expecting "a string" $
82 do (s, representation) <- choice [ multiStr, singleStr ]
83 result <- processAs stringLiteral . sandwich '\"' $ concat s
84 return (result, representation)
85 where
86 rawString quote insides =
87 quote >> manyTill insides quote
88
89 multiStr =
90 do result <- rawString (try (string "\"\"\"")) multilineStringChar
91 return (result, TripleQuotedString)
92 singleStr =
93 do result <- rawString (char '"') stringChar
94 return (result, SingleQuotedString)
95
96 stringChar :: IParser String
97 stringChar = choice [ newlineChar, escaped '\"', (:[]) <$> satisfy (/= '\"') ]
98
99 multilineStringChar :: IParser String
100 multilineStringChar =
101 do noEnd
102 choice [ newlineChar, escaped '\"', expandQuote <$> anyChar ]
103 where
104 noEnd = notFollowedBy (string "\"\"\"")
105 expandQuote c = if c == '\"' then "\\\"" else [c]
106
107 newlineChar :: IParser String
108 newlineChar =
109 choice
110 [ char '\n' >> return "\\n"
111 , char '\r' >> choice
112 [ char '\n' >> return "\\n"
113 , return "\\r"
114 ]
115 ]
116
117
118 chr :: IParser Char
119 chr =
120 betwixt '\'' '\'' character <?> "a character"
121 where
122 nonQuote = satisfy (/='\'')
123
124 character =
125 do c <- choice
126 [ escaped '\''
127 , (:) <$> char '\\' <*> many1 nonQuote
128 , (:[]) <$> nonQuote
129 ]
130
131 processAs charLiteral $ sandwich '\'' c
132
133
134 --
135 -- Stuff forked from Text.Parsec.Token
136 --
137
138 charLiteral :: IParser Char
139 charLiteral = lexeme (between (char '\'')
140 (char '\'' <?> "end of character")
141 characterChar )
142 <?> "character"
143
144 characterChar :: IParser Char
145 characterChar = charLetter <|> charEscape
146 <?> "literal character"
147
148 charEscape :: IParser Char
149 charEscape = do{ _ <- char '\\'; escapeCode }
150 charLetter :: IParser Char
151 charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026') || (c == '\t'))
152
153
154 stringLiteral :: IParser String
155 stringLiteral = lexeme (
156 do{ str <- between (char '"')
157 (char '"' <?> "end of string")
158 (many stringChar)
159 ; return (foldr (maybe id (:)) "" str)
160 }
161 <?> "literal string")
162
163 stringChar :: IParser (Maybe Char)
164 stringChar = do{ c <- stringLetter; return (Just c) }
165 <|> stringEscape
166 <?> "string character"
167
168 stringLetter :: IParser Char
169 stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026') || (c == '\t'))
170
171 stringEscape :: IParser (Maybe Char)
172 stringEscape = do{ _ <- char '\\'
173 ; do{ _ <- escapeGap ; return Nothing }
174 <|> do{ _ <- escapeEmpty; return Nothing }
175 <|> do{ esc <- escapeCode; return (Just esc) }
176 }
177
178 escapeEmpty :: IParser Char
179 escapeEmpty = char '&'
180 escapeGap :: IParser Char
181 escapeGap = do{ _ <- many1 space
182 ; char '\\' <?> "end of string gap"
183 }
184
185
186
187 -- escape codes
188 escapeCode :: IParser Char
189 escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
190 <?> "escape code"
191
192 charControl :: IParser Char
193 charControl = do{ _ <- char '^'
194 ; code <- upper
195 ; return (toEnum (fromEnum code - fromEnum 'A' + 1))
196 }
197
198 charNum :: IParser Char
199 charNum = do{ code <- decimal
200 <|> do{ _ <- char 'o'; number 8 octDigit }
201 <|> do{ _ <- char 'x'; number 16 hexDigit }
202 <|> do{ _ <- char 'u'; between (char '{') (char '}') (number 16 hexDigit) }
203 ; if code > 0x10FFFF
204 then fail "invalid escape sequence"
205 else return (toEnum (fromInteger code))
206 }
207
208 charEsc :: IParser Char
209 charEsc = choice (map parseEsc escMap)
210 where
211 parseEsc :: (Char, a) -> IParser a
212 parseEsc (c,code) = do{ _ <- char c; return code }
213
214 charAscii :: IParser Char
215 charAscii = choice (map parseAscii asciiMap)
216 where
217 parseAscii :: (String, a) -> IParser a
218 parseAscii (asc,code) = try (do{ _ <- string asc; return code })
219
220
221 -- escape code tables
222 escMap :: [(Char, Char)]
223 escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
224 asciiMap :: [(String, Char)]
225 asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
226
227 ascii2codes :: [String]
228 ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
229 "FS","GS","RS","US","SP"]
230 ascii3codes :: [String]
231 ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
232 "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
233 "CAN","SUB","ESC","DEL"]
234
235 ascii2 :: [Char]
236 ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
237 '\EM','\FS','\GS','\RS','\US','\SP']
238 ascii3 :: [Char]
239 ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
240 '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
241 '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
242
243
244 decimal :: IParser Integer
245 decimal = number 10 digit
246
247 number :: Integer -> IParser Char -> IParser Integer
248 number base baseDigit
249 = do{ digits <- many1 baseDigit
250 ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
251 ; seq n (return n)
252 }
253
254
255
256 lexeme :: IParser a -> IParser a
257 lexeme p
258 = do{ x <- p; whiteSpace; return x }
259
260
261 --whiteSpace
262 whiteSpace :: IParser ()
263 whiteSpace = skipMany (simpleSpace <?> "")
264
265 simpleSpace :: IParser ()
266 simpleSpace =
267 skipMany1 (satisfy isSpace)