never executed always true always false
1 {-# LANGUAGE OverloadedStrings #-}
2 module Cheapskate.Util (
3 joinLines
4 , tabFilter
5 , isWhitespace
6 , isEscapable
7 , normalizeReference
8 , Scanner
9 , scanIndentSpace
10 , scanNonindentSpace
11 , scanSpacesToColumn
12 , scanChar
13 , scanBlankline
14 , scanSpaces
15 , scanSpnl
16 , nfb
17 , nfbChar
18 , upToCountChars
19 ) where
20
21 import Data.Text (Text)
22 import qualified Data.Text as T
23 import Data.Char
24 import Control.Applicative ()
25 import Cheapskate.ParserCombinators
26
27 -- Utility functions.
28
29 -- Like T.unlines but does not add a final newline.
30 -- Concatenates lines with newlines between.
31 joinLines :: [Text] -> Text
32 joinLines = T.intercalate "\n"
33
34 -- Convert tabs to spaces using a 4-space tab stop.
35 tabFilter :: Text -> Text
36 tabFilter = T.concat . pad . T.split (== '\t')
37 where pad [] = []
38 pad [t] = [t]
39 pad (t:ts) = let tl = T.length t
40 n = tl + 4 - (tl `mod` 4)
41 in T.justifyLeft n ' ' t : pad ts
42
43 -- These are the whitespace characters that are significant in
44 -- parsing markdown. We can treat \160 (nonbreaking space) etc.
45 -- as regular characters. This function should be considerably
46 -- faster than the unicode-aware isSpace from Data.Char.
47 isWhitespace :: Char -> Bool
48 isWhitespace ' ' = True
49 isWhitespace '\t' = True
50 isWhitespace '\n' = True
51 isWhitespace '\r' = True
52 isWhitespace _ = False
53
54 -- The original Markdown only allowed certain symbols
55 -- to be backslash-escaped. It was hard to remember
56 -- which ones could be, so we now allow any ascii punctuation mark or
57 -- symbol to be escaped, whether or not it has a use in Markdown.
58 isEscapable :: Char -> Bool
59 isEscapable c = isAscii c && (isSymbol c || isPunctuation c)
60
61 -- Link references are case sensitive and ignore line breaks
62 -- and repeated spaces.
63 -- So, [APPLES are good] == [Apples are good] ==
64 -- [Apples
65 -- are good].
66 normalizeReference :: Text -> Text
67 normalizeReference = T.toCaseFold . T.concat . T.split isWhitespace
68
69 -- Scanners are implemented here as attoparsec parsers,
70 -- which consume input and capture nothing. They could easily
71 -- be implemented as regexes in other languages, or hand-coded.
72 -- With the exception of scanSpnl, they are all intended to
73 -- operate on a single line of input (so endOfInput = endOfLine).
74 type Scanner = Parser ()
75
76 -- Scan four spaces.
77 scanIndentSpace :: Scanner
78 scanIndentSpace = () <$ count 4 (skip (==' '))
79
80 scanSpacesToColumn :: Int -> Scanner
81 scanSpacesToColumn col = do
82 currentCol <- column <$> getPosition
83 case col - currentCol of
84 n | n >= 1 -> () <$ (count n (skip (==' ')))
85 | otherwise -> return ()
86
87 -- Scan 0-3 spaces.
88 scanNonindentSpace :: Scanner
89 scanNonindentSpace = () <$ upToCountChars 3 (==' ')
90
91 -- Scan a specified character.
92 scanChar :: Char -> Scanner
93 scanChar c = skip (== c) >> return ()
94
95 -- Scan a blankline.
96 scanBlankline :: Scanner
97 scanBlankline = scanSpaces *> endOfInput
98
99 -- Scan 0 or more spaces
100 scanSpaces :: Scanner
101 scanSpaces = skipWhile (==' ')
102
103 -- Scan 0 or more spaces, and optionally a newline
104 -- and more spaces.
105 scanSpnl :: Scanner
106 scanSpnl = scanSpaces *> option () (char '\n' *> scanSpaces)
107
108 -- Not followed by: Succeed without consuming input if the specified
109 -- scanner would not succeed.
110 nfb :: Parser a -> Scanner
111 nfb = notFollowedBy
112
113 -- Succeed if not followed by a character. Consumes no input.
114 nfbChar :: Char -> Scanner
115 nfbChar c = nfb (skip (==c))
116
117 upToCountChars :: Int -> (Char -> Bool) -> Parser Text
118 upToCountChars cnt f =
119 scan 0 (\n c -> if n < cnt && f c then Just (n+1) else Nothing)