Class declared in MODPATH/userguide/vendor/markdown/markdown.php on line 227.
$block_gamutlink to thisarray(5) ( "doHeaders" => integer 10 "doHorizontalRules" => integer 20 "doLists" => integer 40 "doCodeBlocks" => integer 50 "doBlockQuotes" => integer 60 )
$document_gamutlink to thisarray(2) ( "stripLinkDefinitions" => integer 20 "runBasicBlockGamut" => integer 30 )
$em_relistlink to thisarray(3) ( "" => string(50) "(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)" "*" => string(22) "(?<=\S)(?<!\*)\*(?!\*)" "_" => string(19) "(?<=\S)(?<!_)_(?!_)" )
$em_strong_prepared_relistlink to thisNULL
$em_strong_relistlink to thisarray(3) ( "" => string(56) "(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)" "***" => string(26) "(?<=\S)(?<!\*)\*\*\*(?!\*)" "___" => string(21) "(?<=\S)(?<!_)___(?!_)" )
$empty_element_suffixlink to thisstring(3) " />"
$escape_charslink to thisstring(16) "\`*_{}[]()>#+-.!"
$escape_chars_relink to thisNULL
$html_hasheslink to thisarray(0)
$in_anchorlink to thisbool FALSE
$list_levellink to thisinteger 0
$nested_brackets_depthlink to thisinteger 6
$nested_brackets_relink to thisNULL
$nested_url_parenthesis_depthlink to thisinteger 4
$nested_url_parenthesis_relink to thisNULL
$no_entitieslink to thisbool FALSE
$no_markuplink to thisbool FALSE
$predef_titleslink to thisarray(0)
$predef_urlslink to thisarray(0)
$span_gamutlink to thisarray(7) ( "parseSpan" => integer -30 "doImages" => integer 10 "doAnchors" => integer 20 "doAutoLinks" => integer 30 "encodeAmpsAndAngles" => integer 40 "doItalicsAndBold" => integer 50 "doHardBreaks" => integer 60 )
$strong_relistlink to thisarray(3) ( "" => string(53) "(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)" "**" => string(24) "(?<=\S)(?<!\*)\*\*(?!\*)" "__" => string(20) "(?<=\S)(?<!_)__(?!_)" )
$tab_widthlink to thisinteger 4
$titleslink to thisarray(0)
$urlslink to thisarray(0)
$utf8_strlenlink to thisstring(9) "mb_strlen"
function
__construct()
{
#
# Constructor
function
. Initialize appropriate member variables.
#
his->_initDetab();
$this
->prepareItalicsAndBold();
$this
->nested_brackets_re =
str_repeat
(
'(?>[^\[\]]+|\['
,
$this
->nested_brackets_depth) .
str_repeat
(
'\])*'
,
$this
->nested_brackets_depth);
$this
->nested_url_parenthesis_re =
str_repeat
(
'(?>[^()\s]+|\('
,
$this
->nested_url_parenthesis_depth) .
str_repeat
(
'(?>\)))*'
,
$this
->nested_url_parenthesis_depth);
$this
->escape_chars_re =
'['
. preg_quote(
$this
->escape_chars) .
']'
;
# Sort document, block,
and
span gamut in ascendent priority order.
asort(
$this
->document_gamut);
asort(
$this
->block_gamut);
asort(
$this
->span_gamut);
}
function
_detab_callback(
$matches
)
{
$line
=
$matches
[0];
$strlen
=
$this
->utf8_strlen; #
strlen
function
for
UTF-8.
# Split in blocks.
$blocks
=
explode
(
"\t"
,
$line
);
# Add each blocks to the line.
$line
=
$blocks
[0];
unset(
$blocks
[0]); # Do not add first block twice.
foreach
(
$blocks
as
$block
) {
# Calculate amount of space, insert spaces, insert block.
$amount
=
$this
->tab_width -
$strlen
(
$line
,
'UTF-8'
) %
$this
->tab_width;
$line
.=
str_repeat
(
" "
,
$amount
) .
$block
;
}
return
$line
;
}
function
_doAnchors_inline_callback(
$matches
)
{
$whole_match
=
$matches
[1];
$link_text
=
$this
->runSpanGamut(
$matches
[2]);
$url
=
$matches
[3] ==
''
?
$matches
[4] :
$matches
[3];
$title
= &
$matches
[7];
$url
=
$this
->encodeAttribute(
$url
);
$result
=
"<a href=\"$url\""
;
if
(isset(
$title
)) {
$title
=
$this
->encodeAttribute(
$title
);
$result
.=
" title=\"$title\""
;
}
$link_text
=
$this
->runSpanGamut(
$link_text
);
$result
.=
">$link_text</a>"
;
return
$this
->hashPart(
$result
);
}
function
_doAnchors_reference_callback(
$matches
)
{
$whole_match
=
$matches
[1];
$link_text
=
$matches
[2];
$link_id
= &
$matches
[3];
if
(
$link_id
==
""
) {
#
for
shortcut links like [this][]
or
[this].
$link_id
=
$link_text
;
}
# lower-
case
and
turn embedded newlines into spaces
$link_id
=
strtolower
(
$link_id
);
$link_id
= preg_replace(
'{[ ]?\n}'
,
' '
,
$link_id
);
if
(isset(
$this
->urls[
$link_id
])) {
$url
=
$this
->urls[
$link_id
];
$url
=
$this
->encodeAttribute(
$url
);
$result
=
"<a href=\"$url\""
;
if
(isset(
$this
->titles[
$link_id
])) {
$title
=
$this
->titles[
$link_id
];
$title
=
$this
->encodeAttribute(
$title
);
$result
.=
" title=\"$title\""
;
}
$link_text
=
$this
->runSpanGamut(
$link_text
);
$result
.=
">$link_text</a>"
;
$result
=
$this
->hashPart(
$result
);
}
else
{
$result
=
$whole_match
;
}
return
$result
;
}
function
_doAutoLinks_email_callback(
$matches
)
{
$address
=
$matches
[1];
$link
=
$this
->encodeEmailAddress(
$address
);
return
$this
->hashPart(
$link
);
}
function
_doAutoLinks_url_callback(
$matches
)
{
$url
=
$this
->encodeAttribute(
$matches
[1]);
$link
=
"<a href=\"$url\">$url</a>"
;
return
$this
->hashPart(
$link
);
}
function
_doBlockQuotes_callback(
$matches
)
{
$bq
=
$matches
[1];
# trim one level of quoting - trim whitespace-only lines
$bq
= preg_replace(
'/^[ ]*>[ ]?|^[ ]+$/m'
,
''
,
$bq
);
$bq
=
$this
->runBlockGamut(
$bq
); # recurse
$bq
= preg_replace(
'/^/m'
,
" "
,
$bq
);
# These leading spaces cause problem with <pre> content,
# so we need to fix that:
$bq
= preg_replace_callback(
'{(\s*<pre>.+?</pre>)}sx'
,
array
(&
$this
,
'_DoBlockQuotes_callback2'
),
$bq
);
return
"\n"
.
$this
->hashBlock(
"<blockquote>\n$bq\n</blockquote>"
) .
"\n\n"
;
}
function
_doBlockQuotes_callback2(
$matches
)
{
$pre
=
$matches
[1];
$pre
= preg_replace(
'/^ /m'
,
''
,
$pre
);
return
$pre
;
}
function
_doCodeBlocks_callback(
$matches
)
{
$codeblock
=
$matches
[1];
$codeblock
=
$this
->outdent(
$codeblock
);
$codeblock
= htmlspecialchars(
$codeblock
, ENT_NOQUOTES);
# trim leading newlines
and
trailing newlines
$codeblock
= preg_replace(
'/\A\n+|\n+\z/'
,
''
,
$codeblock
);
$codeblock
=
"<pre><code>$codeblock\n</code></pre>"
;
return
"\n\n"
.
$this
->hashBlock(
$codeblock
) .
"\n\n"
;
}
function
_doHardBreaks_callback(
$matches
)
{
return
$this
->hashPart(
"<br$this->empty_element_suffix\n"
);
}
function
_doHeaders_callback_atx(
$matches
)
{
$level
=
strlen
(
$matches
[1]);
$block
=
"<h$level>"
.
$this
->runSpanGamut(
$matches
[2]) .
"</h$level>"
;
return
"\n"
.
$this
->hashBlock(
$block
) .
"\n\n"
;
}
function
_doHeaders_callback_setext(
$matches
)
{
# Terrible hack to check we haven't found an
empty
list item.
if
(
$matches
[2] ==
'-'
&& preg_match(
'{^-(?: |$)}'
,
$matches
[1]))
return
$matches
[0];
$level
=
$matches
[2]{0} ==
'='
? 1 : 2;
$block
=
"<h$level>"
.
$this
->runSpanGamut(
$matches
[1]) .
"</h$level>"
;
return
"\n"
.
$this
->hashBlock(
$block
) .
"\n\n"
;
}
function
_doImages_inline_callback(
$matches
)
{
$whole_match
=
$matches
[1];
$alt_text
=
$matches
[2];
$url
=
$matches
[3] ==
''
?
$matches
[4] :
$matches
[3];
$title
= &
$matches
[7];
$alt_text
=
$this
->encodeAttribute(
$alt_text
);
$url
=
$this
->encodeAttribute(
$url
);
$result
=
"<img src=\"$url\" alt=\"$alt_text\""
;
if
(isset(
$title
)) {
$title
=
$this
->encodeAttribute(
$title
);
$result
.=
" title=\"$title\""
; #
$title
already quoted
}
$result
.=
$this
->empty_element_suffix;
return
$this
->hashPart(
$result
);
}
function
_doImages_reference_callback(
$matches
)
{
$whole_match
=
$matches
[1];
$alt_text
=
$matches
[2];
$link_id
=
strtolower
(
$matches
[3]);
if
(
$link_id
==
""
) {
$link_id
=
strtolower
(
$alt_text
); #
for
shortcut links like ![this][].
}
$alt_text
=
$this
->encodeAttribute(
$alt_text
);
if
(isset(
$this
->urls[
$link_id
])) {
$url
=
$this
->encodeAttribute(
$this
->urls[
$link_id
]);
$result
=
"<img src=\"$url\" alt=\"$alt_text\""
;
if
(isset(
$this
->titles[
$link_id
])) {
$title
=
$this
->titles[
$link_id
];
$title
=
$this
->encodeAttribute(
$title
);
$result
.=
" title=\"$title\""
;
}
$result
.=
$this
->empty_element_suffix;
$result
=
$this
->hashPart(
$result
);
}
else
{
# If there's no such link ID, leave intact:
$result
=
$whole_match
;
}
return
$result
;
}
function
_doLists_callback(
$matches
)
{
# Re-usable patterns to match list item bullets
and
number markers:
$marker_ul_re
=
'[*+-]'
;
$marker_ol_re
=
'\d+[.]'
;
$marker_any_re
=
"(?:$marker_ul_re|$marker_ol_re)"
;
$list
=
$matches
[1];
$list_type
= preg_match(
"/$marker_ul_re/"
,
$matches
[3]) ?
"ul"
:
"ol"
;
$marker_any_re
= (
$list_type
==
"ul"
?
$marker_ul_re
:
$marker_ol_re
);
$list
.=
"\n"
;
$result
=
$this
->processListItems(
$list
,
$marker_any_re
);
$result
=
$this
->hashBlock(
"<$list_type>\n"
.
$result
.
"</$list_type>"
);
return
"\n"
.
$result
.
"\n\n"
;
}
function
_hashHTMLBlocks_callback(
$matches
)
{
$text
=
$matches
[1];
$key
=
$this
->hashBlock(
$text
);
return
"\n\n$key\n\n"
;
}
function
_initDetab()
{
#
# Check
for
the availability of the
function
in the `utf8_strlen` property
# (initially `mb_strlen`). If the
function
is not available, create a
#
function
that will loosely
count
the number of UTF-8 characters with a
# regular expression.
#
(function_exists(
$this
->utf8_strlen))
return
;
$this
->utf8_strlen = create_function(
'$text'
, '
return
preg_match_all(
/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
text,
$m
);');
}
function
_processListItems_callback(
$matches
)
{
$item
=
$matches
[4];
$leading_line
= &
$matches
[1];
$leading_space
= &
$matches
[2];
$marker_space
=
$matches
[3];
$tailing_blank_line
= &
$matches
[5];
if
(
$leading_line
||
$tailing_blank_line
||
preg_match(
'/\n{2,}/'
,
$item
)) {
# Replace marker with the appropriate whitespace indentation
$item
=
$leading_space
.
str_repeat
(
' '
,
strlen
(
$marker_space
)) .
$item
;
$item
=
$this
->runBlockGamut(
$this
->outdent(
$item
) .
"\n"
);
}
else
{
# Recursion
for
sub-lists:
$item
=
$this
->doLists(
$this
->outdent(
$item
));
$item
= preg_replace(
'/\n+$/'
,
''
,
$item
);
$item
=
$this
->runSpanGamut(
$item
);
}
return
"<li>"
.
$item
.
"</li>\n"
;
}
function
_stripLinkDefinitions_callback(
$matches
)
{
$link_id
=
strtolower
(
$matches
[1]);
$this
->urls[
$link_id
] =
$matches
[2];
$this
->titles[
$link_id
] = &
$matches
[3];
return
''
; # String that will replace the block
}
function
_unhash_callback(
$matches
)
{
return
$this
->html_hashes[
$matches
[0]];
}
function
detab(
$text
)
{
#
# Replace tabs with the appropriate amount of space.
#
For each line we separate the line in blocks delemited by
# tab characters. Then we reconstruct every line by adding the
# appropriate number of space between each blocks.
$text
= preg_replace_callback(
'/^.*\t.*$/m'
,
array
(&
$this
,
'_detab_callback'
),
$text
);
return
$text
;
}
function
doAnchors(
$text
)
{
#
# Turn Markdown link shortcuts into XHTML <a> tags.
#
(
$this
->in_anchor)
return
$text
;
$this
->in_anchor = true;
#
# First, handle reference-style links: [link text] [id]
#
ext = preg_replace_callback('{
# wrap whole match in
$1
\[
(
' . $this->nested_brackets_re . '
) # link text =
$2
\]
[ ]? # one optional space
(?:\n[ ]*)? # one optional newline followed by spaces
\[
(.*?) # id =
$3
\]
xs
', array(&$this, '
_doAnchors_reference_callback'),
$text
);
#
# Next, inline-style links: [link text](url
"optional title"
)
#
ext = preg_replace_callback('{
# wrap whole match in
$1
\[
(
' . $this->nested_brackets_re . '
) # link text =
$2
\]
\( # literal paren
[ ]*
(?:
<(\S*)> # href =
$3
|
(
' . $this->nested_url_parenthesis_re . '
) # href =
$4
)
[ ]*
( #
$5
([\'"]) # quote char =
$6
(.*?) # Title =
$7
\6 # matching quote
[ ]* # ignore any spaces/tabs between closing quote
and
)
)? # title is optional
\)
xs
', array(&$this, '
_DoAnchors_inline_callback'),
$text
);
#
# Last, handle reference-style shortcuts: [link text]
# These must come last in
case
you've also got [link test][1]
#
or
[link test](/foo)
#
$text
= preg_replace_callback('{
( # wrap whole match in
$1
\[
([^\[\]]+) # link text =
$2
; can\'t contain [
or
]
\]
)
}xs',
array
(&
$this
,
'_doAnchors_reference_callback'
),
$text
);
$this
->in_anchor = false;
return
$text
;
}
function
doAutoLinks(
$text
)
{
$text
= preg_replace_callback(
'{<((https?|ftp|dict):[^\'">\s]+)>}i'
,
array
(&
$this
,
'_doAutoLinks_url_callback'
),
$text
);
# Email addresses: <address@domain.foo>
$text
= preg_replace_callback('{
?:mailto:)?
[-.\w\x80-\xFF]+
\@
[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
xi
', array(&$this, '
_doAutoLinks_email_callback'),
$text
);
return
$text
;
}
function
doBlockQuotes(
$text
)
{
$text
= preg_replace_callback('/
( # Wrap whole match in
$1
(?>
^[ ]*>[ ]? #
">"
at the start of a line
.+\n # rest of the first line
(.+\n)* # subsequent consecutive lines
\n* # blanks
)+
)
xm
', array(&$this, '
_doBlockQuotes_callback'),
$text
);
return
$text
;
}
function
doCodeBlocks(
$text
)
{
#
# Process Markdown `<pre><code>` blocks.
#
ext = preg_replace_callback('{
(?:\n\n|\A\n?)
( #
$1
= the code block -- one
or
more lines, starting with a space/tab
(?>
[ ]{
' . $this->tab_width . '
} # Lines must start with a tab
or
a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,
' . $this->tab_width . '
}\S)|\Z) # Lookahead
for
non-space at line-start,
or
end
of doc
xm
', array(&$this, '
_doCodeBlocks_callback'),
$text
);
return
$text
;
}
function
doHardBreaks(
$text
)
{
# Do hard breaks:
return
preg_replace_callback(
'/ {2,}\n/'
,
array
(&
$this
,
'_doHardBreaks_callback'
),
$text
);
}
function
doHeaders(
$text
)
{
# Setext-style headers:
# Header 1
# ========
#
# Header 2
# --------
#
ext = preg_replace_callback(
'{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx'
,
array
(&
$this
,
'_doHeaders_callback_setext'
),
$text
);
# atx-style headers:
# # Header 1
# ## Header 2
# ## Header 2 with closing hashes ##
# ...
# ###### Header 6
#
ext = preg_replace_callback('{
^(\#{1,6}) #
$1
= string of #\'s
[ ]*
(.+?) #
$2
= Header text
[ ]*
\#* # optional closing #\'s (not counted)
\n+
xm
', array(&$this, '
_doHeaders_callback_atx'),
$text
);
return
$text
;
}
function
doHorizontalRules(
$text
)
{
# Do Horizontal Rules:
return
preg_replace(
'{
^[ ]{0,3} # Leading space
([-*_]) #
$1
: First marker
(?> # Repeated marker group
[ ]{0,2} # Zero, one,
or
two spaces.
\1 # Marker character
){2,} # Group repeated at least twice
[ ]* # Tailing spaces
$ #
End
of line.
mx',
"\n"
.
$this
->hashBlock(
"<hr$this->empty_element_suffix"
) .
"\n"
,
$text
);
}
function
doImages(
$text
)
{
#
# Turn Markdown image shortcuts into <img> tags.
#
First, handle reference-style labeled images: ![alt text][id]
#
ext = preg_replace_callback('{
# wrap whole match in
$1
!\[
(
' . $this->nested_brackets_re . '
) # alt text =
$2
\]
[ ]? # one optional space
(?:\n[ ]*)? # one optional newline followed by spaces
\[
(.*?) # id =
$3
\]
xs
', array(&$this, '
_doImages_reference_callback'),
$text
);
#
# Next, handle inline images: 
# Don't forget: encode *
and
_
#
ext = preg_replace_callback('{
# wrap whole match in
$1
!\[
(
' . $this->nested_brackets_re . '
) # alt text =
$2
\]
\s? # One optional whitespace character
\( # literal paren
[ ]*
(?:
<(\S*)> # src url =
$3
|
(
' . $this->nested_url_parenthesis_re . '
) # src url =
$4
)
[ ]*
( #
$5
([\'"]) # quote char =
$6
(.*?) # title =
$7
\6 # matching quote
[ ]*
)? # title is optional
\)
xs
', array(&$this, '
_doImages_inline_callback'),
$text
);
return
$text
;
}
function
doItalicsAndBold(
$text
)
{
$token_stack
=
array
(
''
);
$text_stack
=
array
(
''
);
$em
=
''
;
$strong
=
''
;
$tree_char_em
= false;
while
(1) {
#
# Get prepared regular expression
for
seraching emphasis tokens
# in current context.
#
token_re =
$this
->em_strong_prepared_relist[
"$em$strong"
];
#
# Each loop iteration seach
for
the next emphasis token.
# Each token is then passed to handleSpanToken.
#
parts = preg_split(
$token_re
,
$text
, 2, PREG_SPLIT_DELIM_CAPTURE);
$text_stack
[0] .=
$parts
[0];
$token
= &
$parts
[1];
$text
= &
$parts
[2];
if
(
empty
(
$token
)) {
# Reached
end
of text span:
empty
stack without emitting.
# any more emphasis.
while
(
$token_stack
[0]) {
$text_stack
[1] .=
array_shift
(
$token_stack
);
$text_stack
[0] .=
array_shift
(
$text_stack
);
}
break
;
}
$token_len
=
strlen
(
$token
);
if
(
$tree_char_em
) {
# Reached closing marker
while
inside a three-char emphasis.
if
(
$token_len
== 3) {
# Three-char closing marker, close em
and
strong.
array_shift
(
$token_stack
);
$span
=
array_shift
(
$text_stack
);
$span
=
$this
->runSpanGamut(
$span
);
$span
=
"<strong><em>$span</em></strong>"
;
$text_stack
[0] .=
$this
->hashPart(
$span
);
$em
=
''
;
$strong
=
''
;
}
else
{
# Other closing marker: close one em
or
strong
and
# change current token state to match the other
$token_stack
[0] =
str_repeat
(
$token
{0}, 3 -
$token_len
);
$tag
=
$token_len
== 2 ?
"strong"
:
"em"
;
$span
=
$text_stack
[0];
$span
=
$this
->runSpanGamut(
$span
);
$span
=
"<$tag>$span</$tag>"
;
$text_stack
[0] =
$this
->hashPart(
$span
);
$
$tag
=
''
; # $
$tag
stands
for
$em
or
$strong
}
$tree_char_em
= false;
}
else
if
(
$token_len
== 3) {
if
(
$em
) {
# Reached closing marker
for
both em
and
strong.
# Closing strong marker:
for
(
$i
= 0;
$i
< 2; ++
$i
) {
$shifted_token
=
array_shift
(
$token_stack
);
$tag
=
strlen
(
$shifted_token
) == 2 ?
"strong"
:
"em"
;
$span
=
array_shift
(
$text_stack
);
$span
=
$this
->runSpanGamut(
$span
);
$span
=
"<$tag>$span</$tag>"
;
$text_stack
[0] .=
$this
->hashPart(
$span
);
$
$tag
=
''
; # $
$tag
stands
for
$em
or
$strong
}
}
else
{
# Reached opening three-char emphasis marker. Push on token
# stack; will be handled by the special condition above.
$em
=
$token
{0};
$strong
=
"$em$em"
;
array_unshift
(
$token_stack
,
$token
);
array_unshift
(
$text_stack
,
''
);
$tree_char_em
= true;
}
}
else
if
(
$token_len
== 2) {
if
(
$strong
) {
# Unwind any dangling emphasis marker:
if
(
strlen
(
$token_stack
[0]) == 1) {
$text_stack
[1] .=
array_shift
(
$token_stack
);
$text_stack
[0] .=
array_shift
(
$text_stack
);
}
# Closing strong marker:
array_shift
(
$token_stack
);
$span
=
array_shift
(
$text_stack
);
$span
=
$this
->runSpanGamut(
$span
);
$span
=
"<strong>$span</strong>"
;
$text_stack
[0] .=
$this
->hashPart(
$span
);
$strong
=
''
;
}
else
{
array_unshift
(
$token_stack
,
$token
);
array_unshift
(
$text_stack
,
''
);
$strong
=
$token
;
}
}
else
{
# Here
$token_len
== 1
if
(
$em
) {
if
(
strlen
(
$token_stack
[0]) == 1) {
# Closing emphasis marker:
array_shift
(
$token_stack
);
$span
=
array_shift
(
$text_stack
);
$span
=
$this
->runSpanGamut(
$span
);
$span
=
"<em>$span</em>"
;
$text_stack
[0] .=
$this
->hashPart(
$span
);
$em
=
''
;
}
else
{
$text_stack
[0] .=
$token
;
}
}
else
{
array_unshift
(
$token_stack
,
$token
);
array_unshift
(
$text_stack
,
''
);
$em
=
$token
;
}
}
}
return
$text_stack
[0];
}
function
doLists(
$text
)
{
#
# Form HTML ordered (numbered)
and
unordered (bulleted) lists.
#
ess_than_tab =
$this
->tab_width - 1;
# Re-usable patterns to match list item bullets
and
number markers:
$marker_ul_re
=
'[*+-]'
;
$marker_ol_re
=
'\d+[.]'
;
$marker_any_re
=
"(?:$marker_ul_re|$marker_ol_re)"
;
$markers_relist
=
array
(
$marker_ul_re
,
$marker_ol_re
);
foreach
(
$markers_relist
as
$marker_re
) {
# Re-usable pattern to match any entirel ul
or
ol list:
$whole_list_re
= '
( #
$1
= whole list
( #
$2
[ ]{0,
' . $less_than_tab . '
}
(
' . $marker_re . '
) #
$3
= first list item marker
[ ]+
)
(?s:.+?)
( #
$4
\z
|
\n{2,}
(?=\S)
(?! # Negative lookahead
for
another list item marker
[ ]*
' . $marker_re . '
[ ]+
)
)
)
;
// mx
# We
use
a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
if
(
$this
->list_level) {
$text
= preg_replace_callback('{
^
' . $whole_list_re . '
}mx
', array(&$this, '
_doLists_callback'),
$text
);
}
else
{
$text
= preg_replace_callback('{
(?:(?<=\n)\n|\A\n?) # Must eat the newline
' . $whole_list_re . '
}mx
', array(&$this, '
_doLists_callback'),
$text
);
}
}
return
$text
;
}
function
encodeAmpsAndAngles(
$text
)
{
#
# Smart processing
for
ampersands
and
angle brackets that need to
# be encoded. Valid character entities are left alone unless the
# no-entities mode is set.
#
(
$this
->no_entities) {
$text
=
str_replace
(
'&'
,
'&'
,
$text
);
}
else
{
# Ampersand-encoding based entirely on Nat Irons's Amputator
# MT plugin: <http:
//bumppo.net/projects/amputator/>
$text
= preg_replace(
'/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/'
,
'&'
,
$text
);
;
}
# Encode remaining <'s
$text
=
str_replace
(
'<'
,
'<'
,
$text
);
return
$text
;
}
function
encodeAttribute(
$text
)
{
#
# Encode text
for
a double-quoted HTML attribute. This
function
# is *not* suitable
for
attributes enclosed in single quotes.
#
ext =
$this
->encodeAmpsAndAngles(
$text
);
$text
=
str_replace
(
'"'
,
'"'
,
$text
);
return
$text
;
}
function
encodeEmailAddress(
$addr
)
{
#
# Input: an email address, e.g.
"foo@example.com"
#
utput: the email address
as
a mailto link, with each character
# of the address encoded
as
either a decimal
or
hex entity, in
# the hopes of foiling most address harvesting spam bots. E.g.:
#
<p><a href="mailto:foo
# @example.co
# m">foo@exampl
# e.com</a></p>
#
ased by a filter by Matthew Wickline, posted to BBEdit-Talk.
# With some optimizations by Milian Wolff.
#
ddr =
"mailto:"
.
$addr
;
$chars
= preg_split(
'/(?<!^)(?!$)/'
,
$addr
);
$seed
= (int)
abs
(crc32(
$addr
) /
strlen
(
$addr
)); # Deterministic seed.
foreach
(
$chars
as
$key
=>
$char
) {
$ord
= ord(
$char
);
# Ignore non-ascii chars.
if
(
$ord
< 128) {
$r
= (
$seed
* (1 +
$key
)) % 100; # Pseudo-random
function
.
# roughly 10% raw, 45% hex, 45% dec
#
'@'
*must* be encoded. I insist.
if
(
$r
> 90 &&
$char
!=
'@'
)
/* do nothing */
;
else
if
(
$r
< 45)
$chars
[
$key
] =
'&#x'
.
dechex
(
$ord
) .
';'
;
else
$chars
[
$key
] =
'&#'
.
$ord
.
';'
;
}
}
$addr
= implode(
''
,
$chars
);
$text
= implode(
''
,
array_slice
(
$chars
, 7)); # text without `mailto:`
$addr
=
"<a href=\"$addr\">$text</a>"
;
return
$addr
;
}
function
formParagraphs(
$text
)
{
#
# Params:
#
$text
- string to process with html <p> tags
#
Strip leading
and
trailing lines:
$text
= preg_replace(
'/\A\n+|\n+\z/'
,
''
,
$text
);
$grafs
= preg_split(
'/\n{2,}/'
,
$text
, -1, PREG_SPLIT_NO_EMPTY);
#
# Wrap <p> tags
and
unhashify HTML blocks
#
reach (
$grafs
as
$key
=>
$value
) {
if
(!preg_match(
'/^B\x1A[0-9]+B$/'
,
$value
)) {
# Is a paragraph.
$value
=
$this
->runSpanGamut(
$value
);
$value
= preg_replace(
'/^([ ]*)/'
,
"<p>"
,
$value
);
$value
.=
"</p>"
;
$grafs
[
$key
] =
$this
->unhash(
$value
);
}
else
{
# Is a block.
# Modify elements of @grafs in-place...
$graf
=
$value
;
$block
=
$this
->html_hashes[
$graf
];
$graf
=
$block
;
if
(preg_match('{
\A
( #
$1
= <div> tag
<div \s+
[^>]*
\b
markdown\s*=\s* ([\'"]) #
$2
= attr quote char
1
\2
[^>]*
>
)
( #
$3
= contents
.*
)
(</div>) #
$4
= closing tag
\z
}xs',
$block
,
$matches
))
{
list(,
$div_open
, ,
$div_content
,
$div_close
) =
$matches
;
# We can't call Markdown(), because that resets the hash;
# that initialization code should be pulled into its own sub, though.
$div_content
=
$this
->hashHTMLBlocks(
$div_content
);
# Run document gamut methods on the content.
foreach
(
$this
->document_gamut
as
$method
=>
$priority
) {
$div_content
=
$this
->
$method
(
$div_content
);
}
$div_open
= preg_replace(
'{\smarkdown\s*=\s*([\'"]).+?\1}'
,
''
,
$div_open
);
$graf
=
$div_open
.
"\n"
.
$div_content
.
"\n"
.
$div_close
;
}
$grafs
[
$key
] =
$graf
;
}
}
return
implode(
"\n\n"
,
$grafs
);
}
function
handleSpanToken(
$token
, &
$str
)
{
#
# Handle
$token
provided by parseSpan by determining its nature
and
# returning the corresponding value that should replace it.
#
itch (
$token
{0}) {
case
"\\"
:
return
$this
->hashPart(
"&#"
. ord(
$token
{1}) .
";"
);
case
"`"
:
# Search
for
end
marker in remaining text.
if
(preg_match(
'/^(.*?[^`])'
. preg_quote(
$token
) .
'(?!`)(.*)$/sm'
,
$str
,
$matches
)) {
$str
=
$matches
[2];
$codespan
=
$this
->makeCodeSpan(
$matches
[1]);
return
$this
->hashPart(
$codespan
);
}
return
$token
;
// return as text since no ending marker found.
default
:
return
$this
->hashPart(
$token
);
}
}
function
hashBlock(
$text
)
{
#
# Shortcut
function
for
hashPart with block-level boundaries.
#
turn
$this
->hashPart(
$text
,
'B'
);
}
function
hashHTMLBlocks(
$text
)
{
if
(
$this
->no_markup)
return
$text
;
$less_than_tab
=
$this
->tab_width - 1;
# Hashify HTML blocks:
# We only want to
do
this
for
block-level HTML tags, such
as
headers,
# lists,
and
tables. That's because we still want to wrap <p>s around
#
"paragraphs"
that are wrapped in non-block-level tags, such
as
anchors,
# phrase emphasis,
and
spans. The list of tags we're looking
for
is
# hard-coded:
#
* List
"a"
is made of tags which can be both inline
or
block-level.
# These will be treated block-level when the start tag is alone on
# its line, otherwise they're not matched here
and
will be taken
as
# inline later.
# * List
"b"
is made of tags which are always block-level;
#
lock_tags_a_re =
'ins|del'
;
$block_tags_b_re
=
'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'
.
'script|noscript|form|fieldset|iframe|math'
;
# Regular expression
for
the content of a block tag.
$nested_tags_level
= 4;
$attr
= '
?> # optional tag attributes
\s # starts with whitespace
(?>
[^>"/]+ # text outside quotes
|
/+(?!>) # slash not followed by
">"
|
"[^"
]*
" # text inside double quotes (tolerate "
>")
|
\'[^\']*\' # text inside single quotes (tolerate
">"
)
)*
?
;
$content
=
str_repeat
('
(?>
[^<]+ # content without tag
|
<\2 # nested opening tag
' . $attr . '
# attributes
(?>
/>
|
>',
$nested_tags_level
) . #
end
of opening tag
'.*?'
. # last level nested tag content
str_repeat
('
</\2\s*> # closing nested tag
)
|
<(?!/\2\s*> # other tags with a different name
)
)*',
$nested_tags_level
);
$content2
=
str_replace
(
'\2'
,
'\3'
,
$content
);
# First, look
for
nested blocks, e.g.:
# <div>
# <div>
# tags
for
inner block must be indented.
# </div>
# </div>
#
The outermost tags must start at the left margin
for
this to match,
and
# the inner nested divs must be indented.
# We need to
do
this before the next, more liberal match, because the next
# match will start at the first `<div>`
and
stop at the first `</div>`.
$text
= preg_replace_callback('{(?>
?>
(?<=\n\n) # Starting after a blank line
| #
or
\A\n? # the beginning of the doc
# save in
$1
# Match from `\n<tag>` to `</tag>\n`, handling nested tags
# in between.
[ ]{0,
' . $less_than_tab . '
}
<(
' . $block_tags_b_re . '
)# start tag =
$2
' . $attr . '
> # attributes followed by >
and
\n
' . $content . '
# content, support nesting
</\2> # the matching
end
tag
[ ]* # trailing spaces/tabs
(?=\n+|\Z) # followed by a newline
or
end
of document
# Special version
for
tags of group a.
[ ]{0,
' . $less_than_tab . '
}
<(
' . $block_tags_a_re . '
)# start tag =
$3
' . $attr . '
>[ ]*\n # attributes followed by >
' . $content2 . '
# content, support nesting
</\3> # the matching
end
tag
[ ]* # trailing spaces/tabs
(?=\n+|\Z) # followed by a newline
or
end
of document
# Special
case
just
for
<hr />. It was easier to make a special
#
case
than to make the other regex more complicated.
[ ]{0,
' . $less_than_tab . '
}
<(hr) # start tag =
$2
' . $attr . '
# attributes
/?> # the matching
end
tag
[ ]*
(?=\n{2,}|\Z) # followed by a blank line
or
end
of document
# Special
case
for
standalone HTML comments:
[ ]{0,
' . $less_than_tab . '
}
(?s:
<!-- .*? -->
)
[ ]*
(?=\n{2,}|\Z) # followed by a blank line
or
end
of document
# PHP
and
ASP-style processor instructions (<?
and
<%)
[ ]{0,
' . $less_than_tab . '
}
(?s:
<([?%]) #
$2
.*?
\2>
)
[ ]*
(?=\n{2,}|\Z) # followed by a blank line
or
end
of document
}Sxmi
', array(&$this, '
_hashHTMLBlocks_callback'),
$text
);
return
$text
;
}
function
hashPart(
$text
,
$boundary
=
'X'
)
{
#
# Called whenever a tag must be hashed when a
function
insert an atomic
# element in the text stream. Passing
$text
to through this
function
gives
# a unique text-token which will be reverted back when calling unhash.
#
he
$boundary
argument specify what character should be used to surround
# the token. By convension,
"B"
is used
for
block elements that needs not
# to be wrapped into paragraph tags at the
end
,
":"
is used
for
elements
# that are word separators
and
"X"
is used in the general
case
.
#
Swap back any tag hash found in
$text
so we
do
not have to `unhash`
# multiple times at the
end
.
$text
=
$this
->unhash(
$text
);
# Then hash the block.
static
$i
= 0;
$key
=
"$boundary\x1A"
. ++
$i
.
$boundary
;
$this
->html_hashes[
$key
] =
$text
;
return
$key
; # String that will replace the tag.
}
function
makeCodeSpan(
$code
)
{
#
# Create a code span markup
for
$code
. Called from handleSpanToken.
#
ode = htmlspecialchars(trim(
$code
), ENT_NOQUOTES);
return
$this
->hashPart(
"<code>$code</code>"
);
}
function
outdent(
$text
)
{
#
# Remove one level of line-leading tabs
or
spaces
#
turn preg_replace(
'/^(\t|[ ]{1,'
.
$this
->tab_width .
'})/m'
,
''
,
$text
);
}
function
parseSpan(
$str
)
{
#
# Take the string
$str
and
parse it into tokens, hashing embeded HTML,
# escaped characters
and
handling code spans.
#
utput =
''
;
$span_re
= '{
(
\\\\
' . $this->escape_chars_re . '
|
(?<![`\\\\])
`+ # code span marker
. (
$this
->no_markup ?
''
: '
|
<!-- .*? --> # comment
|
<\?.*?\?> | <%.*?%> # processing instruction
|
<[/!$]?[-a-zA-Z0-9:]+ # regular tags
(?>
\s
(?>[^
"\'>]+|"
[^
"]*"
|\'[^\']*\')*
)?
>
) . '
)
}xs';
while
(1) {
#
# Each loop iteration seach
for
either the next tag, the next
# openning code span marker,
or
the next escaped character.
# Each token is then passed to handleSpanToken.
#
parts = preg_split(
$span_re
,
$str
, 2, PREG_SPLIT_DELIM_CAPTURE);
# Create token from text preceding tag.
if
(
$parts
[0] !=
""
) {
$output
.=
$parts
[0];
}
# Check
if
we reach the
end
.
if
(isset(
$parts
[1])) {
$output
.=
$this
->handleSpanToken(
$parts
[1],
$parts
[2]);
$str
=
$parts
[2];
}
else
{
break
;
}
}
return
$output
;
}
function
prepareItalicsAndBold()
{
#
# Prepare regular expressions
for
seraching emphasis tokens in any
# context.
#
reach (
$this
->em_relist
as
$em
=>
$em_re
) {
foreach
(
$this
->strong_relist
as
$strong
=>
$strong_re
) {
# Construct list of allowed token expressions.
$token_relist
=
array
();
if
(isset(
$this
->em_strong_relist[
"$em$strong"
])) {
$token_relist
[] =
$this
->em_strong_relist[
"$em$strong"
];
}
$token_relist
[] =
$em_re
;
$token_relist
[] =
$strong_re
;
# Construct master expression from list.
$token_re
=
'{('
. implode(
'|'
,
$token_relist
) .
')}'
;
$this
->em_strong_prepared_relist[
"$em$strong"
] =
$token_re
;
}
}
}
function
processListItems(
$list_str
,
$marker_any_re
)
{
#
# Process the contents of a single ordered
or
unordered list, splitting it
# into individual list items.
#
The
$this
->list_level
global
keeps track of when we're inside a list.
# Each time we enter a list, we increment it; when we leave a list,
# we decrement. If it
's zero, we'
re not in a list anymore.
#
We
do
this because when we're not inside a list, we want to treat
# something like this:
#
I recommend upgrading to version
# 8. Oops, now this line is treated
#
as
a sub-list.
#
As a single paragraph, despite the fact that the second line starts
# with a digit-period-space sequence.
#
Whereas when we're inside a list (
or
sub-list), that line will be
# treated
as
the start of a sub-list. What a kludge, huh? This is
# an aspect of Markdown
's syntax that'
s hard to parse perfectly
# without resorting to mind-reading. Perhaps the solution is to
# change the syntax rules such that sub-lists must start with a
# starting cardinal number; e.g.
"1."
or
"a."
.
$this
->list_level++;
# trim trailing blank lines:
$list_str
= preg_replace(
"/\n{2,}\\z/"
,
"\n"
,
$list_str
);
$list_str
= preg_replace_callback('{
\n)? # leading line =
$1
^[ ]*) # leading whitespace =
$2
' . $marker_any_re . '
# list marker
and
space =
$3
(?:[ ]+|(?=\n)) # space only required
if
item is not
empty
(?s:.*?)) # list item text =
$4
?:(\n+(?=\n))|\n) # tailing blank line =
$5
?= \n* (\z | \2 (
' . $marker_any_re . '
) (?:[ ]+|(?=\n))))
xm
', array(&$this, '
_processListItems_callback'),
$list_str
);
$this
->list_level--;
return
$list_str
;
}
function
runBasicBlockGamut(
$text
)
{
#
# Run block gamut tranformations, without hashing HTML blocks. This is
# useful when HTML blocks are known to be already hashed, like in the first
# whole-document pass.
#
reach (
$this
->block_gamut
as
$method
=>
$priority
) {
$text
=
$this
->
$method
(
$text
);
}
# Finally form paragraph
and
restore hashed blocks.
$text
=
$this
->formParagraphs(
$text
);
return
$text
;
}
function
runBlockGamut(
$text
)
{
#
# Run block gamut tranformations.
#
We need to escape raw HTML in Markdown source before doing anything
#
else
. This need to be done
for
each block,
and
not only at the
# begining in the Markdown
function
since hashed blocks can be part of
# list items
and
could have been indented. Indented blocks would have
# been seen
as
a code block in a previous pass of hashHTMLBlocks.
$text
=
$this
->hashHTMLBlocks(
$text
);
return
$this
->runBasicBlockGamut(
$text
);
}
function
runSpanGamut(
$text
)
{
#
# Run span gamut tranformations.
#
reach (
$this
->span_gamut
as
$method
=>
$priority
) {
$text
=
$this
->
$method
(
$text
);
}
return
$text
;
}
function
setup()
{
#
# Called before the transformation process starts to setup parser
# states.
#
Clear
global
hashes.
$this
->urls =
$this
->predef_urls;
$this
->titles =
$this
->predef_titles;
$this
->html_hashes =
array
();
$in_anchor
= false;
}
function
stripLinkDefinitions(
$text
)
{
#
# Strips link definitions from text, stores the URLs
and
titles in
# hash references.
#
ess_than_tab =
$this
->tab_width - 1;
# Link defs are in the form: ^[id]: url
"optional title"
$text
= preg_replace_callback('{
^[ ]{0,
' . $less_than_tab . '
}\[(.+)\][ ]?: # id =
$1
[ ]*
\n? # maybe *one* newline
[ ]*
<?(\S+?)>? # url =
$2
[ ]*
\n? # maybe one newline
[ ]*
(?:
(?<=\s) # lookbehind
for
whitespace
["(]
(.*?) # title =
$3
[")]
[ ]*
)? # title is optional
(?:\n+|\Z)
xm
', array(&$this, '
_stripLinkDefinitions_callback'),
$text
);
return
$text
;
}
function
teardown()
{
#
# Called after the transformation process to clear any variable
# which may be taking up memory unnecessarly.
#
his->urls =
array
();
$this
->titles =
array
();
$this
->html_hashes =
array
();
}
function
transform(
$text
)
{
#
# Main
function
. Performs some preprocessing on the input text
#
and
pass it through the document gamut.
#
his->setup();
# Remove UTF-8 BOM
and
marker character in input,
if
present.
$text
= preg_replace(
'{^\xEF\xBB\xBF|\x1A}'
,
''
,
$text
);
# Standardize line endings:
# DOS to Unix
and
Mac to Unix
$text
= preg_replace(
'{\r\n?}'
,
"\n"
,
$text
);
# Make sure
$text
ends with a couple of newlines:
$text
.=
"\n\n"
;
# Convert all tabs to spaces.
$text
=
$this
->detab(
$text
);
# Turn block-level HTML blocks into hash entries
$text
=
$this
->hashHTMLBlocks(
$text
);
# Strip any lines consisting only of spaces
and
tabs.
# This makes subsequent regexen easier to write, because we can
# match consecutive blank lines with /\n+/ instead of something
# contorted like /[ ]*\n+/ .
$text
= preg_replace(
'/^[ ]+$/m'
,
''
,
$text
);
# Run document gamut methods.
foreach
(
$this
->document_gamut
as
$method
=>
$priority
) {
$text
=
$this
->
$method
(
$text
);
}
$this
->teardown();
return
$text
.
"\n"
;
}
function
unhash(
$text
)
{
#
# Swap back in all the tags hashed by _HashHTMLBlocks.
#
turn preg_replace_callback(
'/(.)\x1A[0-9]+\1/'
,
array
(&
$this
,
'_unhash_callback'
),
$text
);
}