never executed always true always false
    1 module Parse.Literal (literal) where
    2 
    3 import Prelude hiding (exponent)
    4 import Data.Char (digitToInt, isSpace)
    5 import Text.Parsec ((<|>), (<?>), digit, hexDigit, lookAhead, many1, option, string, try, char, notFollowedBy, choice, anyChar, satisfy, manyTill, many, between, skipMany, skipMany1)
    6 import Text.Parsec.Char (octDigit, space, upper)
    7 import Parse.Helpers (processAs, escaped, expecting, sandwich, betwixt)
    8 import Parse.IParser
    9 
   10 import AST.V0_16
   11 
   12 
   13 literal :: IParser LiteralValue
   14 literal =
   15   num <|> (uncurry Str <$> str) <|> (Chr <$> chr)
   16 
   17 
   18 num :: IParser LiteralValue
   19 num =
   20   toLiteral <$> (rawNumber <?> "a number")
   21 
   22 
   23 toLiteral :: String -> LiteralValue
   24 toLiteral n
   25   | 'x' `elem` n         = IntNum (read n) HexadecimalInt
   26   | any (`elem` ("eE" :: String)) n = FloatNum (read n) ExponentFloat
   27   | any (`elem` ("." :: String)) n = FloatNum (read n) DecimalFloat
   28   | otherwise            = IntNum (read n) DecimalInt
   29 
   30 
   31 rawNumber :: IParser String
   32 rawNumber =
   33   concat <$> sequence
   34     [ option "" minus
   35     , base16 <|> base10
   36     ]
   37 
   38 
   39 base16 :: IParser String
   40 base16 =
   41   do  _ <- try (string "0x")
   42       digits <- many1 hexDigit
   43       return ("0x" ++ digits)
   44 
   45 
   46 base10 :: IParser String
   47 base10 =
   48   concat <$> sequence
   49     [ many1 digit
   50     , option "" decimals
   51     , option "" exponent
   52     ]
   53 
   54 
   55 minus :: IParser String
   56 minus =
   57   try $ do
   58     _ <- string "-"
   59     _ <- lookAhead digit
   60     return "-"
   61 
   62 
   63 decimals :: IParser String
   64 decimals =
   65   do  _ <- try $ lookAhead (string "." >> digit)
   66       _ <- string "."
   67       n <- many1 digit
   68       return ('.' : n)
   69 
   70 
   71 exponent :: IParser String
   72 exponent =
   73   do  _ <- string "e" <|> string "E"
   74       op <- option "" (string "+" <|> string "-")
   75       n <- many1 digit
   76       return ('e' : op ++ n)
   77 
   78 
   79 str :: IParser (String, StringRepresentation)
   80 str =
   81   expecting "a string" $
   82   do  (s, representation) <- choice [ multiStr, singleStr ]
   83       result <- processAs stringLiteral . sandwich '\"' $ concat s
   84       return (result, representation)
   85   where
   86     rawString quote insides =
   87         quote >> manyTill insides quote
   88 
   89     multiStr  =
   90         do  result <- rawString (try (string "\"\"\"")) multilineStringChar
   91             return (result, TripleQuotedString)
   92     singleStr =
   93         do  result <- rawString (char '"') stringChar
   94             return (result, SingleQuotedString)
   95 
   96     stringChar :: IParser String
   97     stringChar = choice [ newlineChar, escaped '\"', (:[]) <$> satisfy (/= '\"') ]
   98 
   99     multilineStringChar :: IParser String
  100     multilineStringChar =
  101         do noEnd
  102            choice [ newlineChar, escaped '\"', expandQuote <$> anyChar ]
  103         where
  104           noEnd = notFollowedBy (string "\"\"\"")
  105           expandQuote c = if c == '\"' then "\\\"" else [c]
  106 
  107     newlineChar :: IParser String
  108     newlineChar =
  109         choice
  110             [ char '\n' >> return "\\n"
  111             , char '\r' >> choice
  112                 [ char '\n' >> return "\\n"
  113                 , return "\\r"
  114                 ]
  115             ]
  116 
  117 
  118 chr :: IParser Char
  119 chr =
  120     betwixt '\'' '\'' character <?> "a character"
  121   where
  122     nonQuote = satisfy (/='\'')
  123 
  124     character =
  125       do  c <- choice
  126                 [ escaped '\''
  127                 , (:) <$> char '\\' <*> many1 nonQuote
  128                 , (:[]) <$> nonQuote
  129                 ]
  130 
  131           processAs charLiteral $ sandwich '\'' c
  132 
  133 
  134 --
  135 -- Stuff forked from Text.Parsec.Token
  136 --
  137 
  138 charLiteral :: IParser Char
  139 charLiteral     = lexeme (between (char '\'')
  140                                   (char '\'' <?> "end of character")
  141                                   characterChar )
  142                 <?> "character"
  143 
  144 characterChar :: IParser Char
  145 characterChar   = charLetter <|> charEscape
  146                 <?> "literal character"
  147 
  148 charEscape :: IParser Char
  149 charEscape      = do{ _ <- char '\\'; escapeCode }
  150 charLetter :: IParser Char
  151 charLetter      = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026') || (c == '\t'))
  152 
  153 
  154 stringLiteral :: IParser String
  155 stringLiteral   = lexeme (
  156                   do{ str <- between (char '"')
  157                                       (char '"' <?> "end of string")
  158                                       (many stringChar)
  159                     ; return (foldr (maybe id (:)) "" str)
  160                     }
  161                   <?> "literal string")
  162 
  163 stringChar :: IParser (Maybe Char)
  164 stringChar      =   do{ c <- stringLetter; return (Just c) }
  165                 <|> stringEscape
  166                 <?> "string character"
  167 
  168 stringLetter :: IParser Char
  169 stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026') || (c == '\t'))
  170 
  171 stringEscape :: IParser (Maybe Char)
  172 stringEscape    = do{ _ <- char '\\'
  173                     ;     do{ _ <- escapeGap  ; return Nothing }
  174                       <|> do{ _ <- escapeEmpty; return Nothing }
  175                       <|> do{ esc <- escapeCode; return (Just esc) }
  176                     }
  177 
  178 escapeEmpty :: IParser Char
  179 escapeEmpty     = char '&'
  180 escapeGap :: IParser Char
  181 escapeGap       = do{ _ <- many1 space
  182                     ; char '\\' <?> "end of string gap"
  183                     }
  184 
  185 
  186 
  187 -- escape codes
  188 escapeCode :: IParser Char
  189 escapeCode      = charEsc <|> charNum <|> charAscii <|> charControl
  190                 <?> "escape code"
  191 
  192 charControl :: IParser Char
  193 charControl     = do{ _ <- char '^'
  194                     ; code <- upper
  195                     ; return (toEnum (fromEnum code - fromEnum 'A' + 1))
  196                     }
  197 
  198 charNum :: IParser Char
  199 charNum         = do{ code <- decimal
  200                               <|> do{ _ <- char 'o'; number 8 octDigit }
  201                               <|> do{ _ <- char 'x'; number 16 hexDigit }
  202                               <|> do{ _ <- char 'u'; between (char '{') (char '}') (number 16 hexDigit) }
  203                     ; if code > 0x10FFFF
  204                       then fail "invalid escape sequence"
  205                       else return (toEnum (fromInteger code))
  206                     }
  207 
  208 charEsc :: IParser Char
  209 charEsc         = choice (map parseEsc escMap)
  210                 where
  211                   parseEsc :: (Char, a) -> IParser a
  212                   parseEsc (c,code)     = do{ _ <- char c; return code }
  213 
  214 charAscii :: IParser Char
  215 charAscii       = choice (map parseAscii asciiMap)
  216                 where
  217                   parseAscii :: (String, a) -> IParser a
  218                   parseAscii (asc,code) = try (do{ _ <- string asc; return code })
  219 
  220 
  221 -- escape code tables
  222 escMap :: [(Char, Char)]
  223 escMap          = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
  224 asciiMap :: [(String, Char)]
  225 asciiMap        = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
  226 
  227 ascii2codes :: [String]
  228 ascii2codes     = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
  229                     "FS","GS","RS","US","SP"]
  230 ascii3codes :: [String]
  231 ascii3codes     = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
  232                     "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
  233                     "CAN","SUB","ESC","DEL"]
  234 
  235 ascii2 :: [Char]
  236 ascii2          = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
  237                     '\EM','\FS','\GS','\RS','\US','\SP']
  238 ascii3 :: [Char]
  239 ascii3          = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
  240                     '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
  241                     '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
  242 
  243 
  244 decimal :: IParser Integer
  245 decimal         = number 10 digit
  246 
  247 number :: Integer -> IParser Char -> IParser Integer
  248 number base baseDigit
  249     = do{ digits <- many1 baseDigit
  250         ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
  251         ; seq n (return n)
  252         }
  253 
  254 
  255 
  256 lexeme :: IParser a -> IParser a
  257 lexeme p
  258     = do{ x <- p; whiteSpace; return x  }
  259 
  260 
  261 --whiteSpace
  262 whiteSpace :: IParser ()
  263 whiteSpace = skipMany (simpleSpace <?> "")
  264 
  265 simpleSpace :: IParser ()
  266 simpleSpace =
  267     skipMany1 (satisfy isSpace)