PpubMarkdown.lang 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!--
  3. Author: Jean-Philippe Fleury & Billy Barrow
  4. Copyright (C) 2011 Jean-Philippe Fleury <contact@jpfleury.net>
  5. Copyright (C) 2024 Billy Barrow
  6. GtkSourceView is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2.1 of the License, or (at your option) any later version.
  10. GtkSourceView is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public License
  15. along with this library; if not, see <http://www.gnu.org/licenses/>.
  16. -->
  17. <!-- Note: this language definition file adds support for Markdown syntax,
  18. described in the following websites:
  19. * (fr) <http://michelf.com/projets/php-markdown/syntaxe/>
  20. * (en) <http://daringfireball.net/projects/markdown/syntax> -->
  21. <language id="ppub-markdown" name="Ppub Markdown" version="2.0" _section="Markup">
  22. <metadata>
  23. <property name="mimetypes">text/x-markdown</property>
  24. <property name="globs">*.markdown;*.md;*.mkd</property>
  25. <property name="block-comment-start">&lt;!--</property>
  26. <property name="block-comment-end">--&gt;</property>
  27. <property name="suggested-suffix">.md</property>
  28. </metadata>
  29. <styles>
  30. <style id="header" name="Header" map-to="def:heading"/>
  31. <style id="horizontal-rule" name="Horizontal Rule" map-to="def:thematic-break"/>
  32. <style id="list-marker" name="List Marker" map-to="def:list-marker"/>
  33. <style id="code-span" name="Code Span" map-to="def:inline-code"/>
  34. <style id="code-block" name="Code Block" map-to="def:preformatted-section"/>
  35. <style id="blockquote-marker" name="Blockquote Marker" map-to="def:shebang"/>
  36. <style id="url" name="URL" map-to="def:link-destination"/>
  37. <style id="link-text" name="Link Text" map-to="def:link-text"/>
  38. <style id="label" name="Label" map-to="def:preprocessor"/>
  39. <style id="attribute-value" name="Attribute Value" map-to="def:constant"/>
  40. <style id="image-marker" name="Image Marker" map-to="def:link-symbol"/>
  41. <style id="emphasis" name="Emphasis" map-to="def:emphasis"/>
  42. <style id="strong-emphasis" name="Strong Emphasis" map-to="def:strong-emphasis"/>
  43. <style id="backslash-escape" name="Backslash Escape" map-to="def:special-char"/>
  44. <style id="line-break" name="Line Break" map-to="def:note"/>
  45. </styles>
  46. <definitions>
  47. <!-- Examples:
  48. # Header 1 #
  49. ## Header 2
  50. ###Header 3###
  51. -->
  52. <context id="atx-header-1" class="h1" style-ref="header">
  53. <match>^# .+</match>
  54. </context>
  55. <context id="atx-header-2" class="h2" style-ref="header">
  56. <match>^## .+</match>
  57. </context>
  58. <context id="atx-header-3" class="h3" style-ref="header">
  59. <match>^### .+</match>
  60. </context>
  61. <context id="atx-header-4" class="h4" style-ref="header">
  62. <match>^#### .+</match>
  63. </context>
  64. <context id="atx-header-5" class="h5" style-ref="header">
  65. <match>^##### .+</match>
  66. </context>
  67. <context id="atx-header-6" class="h6" style-ref="header">
  68. <match>^###### .+</match>
  69. </context>
  70. <!-- Examples:
  71. Header 1
  72. ========
  73. Header 2
  74. -
  75. -->
  76. <!-- Note: line break can't be used in regex, so only underline is matched. -->
  77. <context id="setext-header-1" class="setext-h1" style-ref="header">
  78. <match>^(=+)[ \t]*$</match>
  79. </context>
  80. <context id="setext-header-2" class="setext-h1" style-ref="header">
  81. <match>^(-+)[ \t]*$</match>
  82. </context>
  83. <!-- Examples:
  84. - - -
  85. ** ** ** ** **
  86. _____
  87. -->
  88. <context id="horizontal-rule" class="no-format" style-ref="horizontal-rule">
  89. <match extended="true">
  90. ^[ ]{0,3} # Maximum 3 spaces at the beginning of the line.
  91. (
  92. (-[ ]{0,2}){3,} | # 3 or more hyphens, with 2 spaces maximum between each hyphen.
  93. (_[ ]{0,2}){3,} | # Idem, but with underscores.
  94. (\*[ ]{0,2}){3,} # Idem, but with asterisks.
  95. )
  96. [ \t]*$ # Optional trailing spaces or tabs.
  97. </match>
  98. </context>
  99. <!-- Note about following list and code block contexts: according to the
  100. Markdown syntax, to write several paragraphs in a list item, we have
  101. to indent each paragraph. Example:
  102. - Item A (paragraph 1).
  103. Item A (paragraph 2).
  104. Item A (paragraph 3).
  105. - Item B.
  106. So there is a conflict in terms of syntax highlighting between an
  107. indented paragraph inside a list item (4 spaces or 1 tab) and an
  108. indented line of code outside a list (also 4 spaces or 1 tab). In this
  109. language file, since a full context analysis can't be done (because
  110. line break can't be used in regex), the choice was made ​​to highlight
  111. code block only from 2 levels of indentation. -->
  112. <!-- Example (unordered list):
  113. * Item
  114. + Item
  115. - Item
  116. Example (ordered list):
  117. 1. Item
  118. 2. Item
  119. 3. Item
  120. -->
  121. <context id="list" class="list" style-ref="list-marker">
  122. <match extended="true">
  123. ^[ ]{0,3} # Maximum 3 spaces at the beginning of the line.
  124. (
  125. \*|\+|-| # Asterisk, plus or hyphen for unordered list.
  126. [0-9]+\. # Number followed by period for ordered list.
  127. )
  128. [ \t]+ # Must be followed by at least 1 space or 1 tab.
  129. </match>
  130. </context>
  131. <!-- Example:
  132. <em>HTML code</em> displayed <strong>literally</strong>.
  133. -->
  134. <context id="code-block" class="pre no-format no-spell-check">
  135. <match>^( {8,}|\t{2,})([^ \t]+.*)</match>
  136. <include>
  137. <context sub-pattern="2" style-ref="code-block"/>
  138. </include>
  139. </context>
  140. <!-- Note about following code span contexts: within a paragraph, text
  141. wrapped with backticks indicates a code span. Markdown allows to use
  142. one or more backticks to wrap text, provided that the number is identical
  143. on both sides, and the same number of consecutive backticks is not
  144. present within the text. The current language file supports code span
  145. highlighting with up to 2 backticks surrounding text. -->
  146. <!-- Examples:
  147. Here's a literal HTML tag: `<p>`.
  148. `Here's a code span containing ``backticks``.`
  149. -->
  150. <context id="1-backtick-code-span" class="pre no-format no-spell-check" style-ref="code-span">
  151. <match>(?&lt;!`)`[^`]+(`{2,}[^`]+)*`(?!`)</match>
  152. </context>
  153. <!-- Examples:
  154. Here's a literal HTML tag: ``<p>``.
  155. ``The grave accent (`) is used in Markdown to indicate a code span.``
  156. ``Here's another code span containing ```backticks```.``
  157. -->
  158. <context id="2-backticks-code-span" class="pre no-format no-spell-check" style-ref="code-span">
  159. <match>(?&lt;!`)``[^`]+((`|`{3,})[^`]+)*``(?!`)</match>
  160. </context>
  161. <context id="3-backticks-code-span" class="pre no-format no-spell-check" style-ref="code-block">
  162. <start>^```.*$</start>
  163. <end>^```$</end>
  164. </context>
  165. <!-- Example:
  166. > Quoted text.
  167. > Quoted text with `code span`.
  168. >> Blockquote **nested**.
  169. -->
  170. <!-- Note: blockquote can contain block-level and inline Markdown elements,
  171. but the current language file only highlights inline ones (emphasis,
  172. link, etc.). -->
  173. <context id="blockquote" class="quot" end-at-line-end="true">
  174. <start>^( {0,3}&gt;(?=.)( {0,4}&gt;)*)</start>
  175. <include>
  176. <context sub-pattern="1" where="start" style-ref="blockquote-marker"/>
  177. <context ref="1-backtick-code-span"/>
  178. <context ref="2-backticks-code-span"/>
  179. <context ref="3-backticks-code-span"/>
  180. <context ref="automatic-link"/>
  181. <context ref="inline-link"/>
  182. <context ref="reference-link"/>
  183. <context ref="inline-image"/>
  184. <context ref="reference-image"/>
  185. <context ref="underscores-emphasis"/>
  186. <context ref="asterisks-emphasis"/>
  187. <context ref="underscores-strong-emphasis"/>
  188. <context ref="asterisks-strong-emphasis"/>
  189. <context ref="backslash-escape"/>
  190. <context ref="line-break"/>
  191. </include>
  192. </context>
  193. <!-- Examples:
  194. <user@example.com>
  195. <http://www.example.com/>
  196. -->
  197. <!-- Note: regular expressions are based from function `_DoAutoLinks` from
  198. Markdown.pl (see <http://daringfireball.net/projects/markdown/>). -->
  199. <context id="automatic-link" class="link no-spell-check no-format">
  200. <match case-sensitive="false" extended="true">
  201. &lt;
  202. (((mailto:)?[a-z0-9.-]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+) | # E-mail.
  203. ((https?|ftp):[^'">\s]+)) # URL.
  204. &gt;
  205. </match>
  206. <include>
  207. <context sub-pattern="1" style-ref="url"/>
  208. </include>
  209. </context>
  210. <!-- Examples:
  211. [link text](http://www.example.com/)
  212. [link text](<http://www.example.com/>)
  213. [link text]( /folder/page.html "Title" )
  214. -->
  215. <context id="inline-link" class="link no-format">
  216. <match extended="true">
  217. \[(.*?)\] # Link text.
  218. \( # Literal opening parenthesis.
  219. [ \t]* # Optional spaces or tabs after the opening parenthesis.
  220. (&lt;(.*?)&gt; | # URL with brackets.
  221. (.*?)) # URL without brackets.
  222. ([ \t]+(".*?"))? # Optional title.
  223. [ \t]* # Optional spaces or tabs before the closing parenthesis.
  224. \) # Literal closing parenthesis.
  225. </match>
  226. <include>
  227. <context sub-pattern="1" style-ref="link-text"/>
  228. <context sub-pattern="3" class="no-spell-check" style-ref="url"/>
  229. <context sub-pattern="4" class="no-spell-check" style-ref="url"/>
  230. <context sub-pattern="6" style-ref="attribute-value"/>
  231. </include>
  232. </context>
  233. <!-- Examples:
  234. [link text]
  235. [link text][]
  236. [link text][link label]
  237. [link text] [link label]
  238. -->
  239. <!-- Note: some assertions are used to differentiate reference link from
  240. link label. -->
  241. <context id="reference-link" class="link no-format">
  242. <match>(?&lt;!^ |^ |^ )\[(.*?)\]([ \t]?\[(.*?)\])?(?!:)</match>
  243. <include>
  244. <context sub-pattern="1" style-ref="link-text"/>
  245. <context sub-pattern="3" class="no-spell-check" style-ref="label"/>
  246. </include>
  247. </context>
  248. <!-- Examples:
  249. [link label]: /folder/page.html
  250. [link label]: <http://www.example.com/>
  251. [link label]: http://www.example.com/ "Title"
  252. -->
  253. <context id="link-definition" class="link no-format">
  254. <match extended="true">
  255. ^[ ]{0,3} # Maximum 3 spaces at the beginning of the line.
  256. \[(.+?)\]: # Link label and colon.
  257. [ \t]* # Optional spaces or tabs.
  258. (&lt;([^ \t]+?)&gt; | # URL with brackets.
  259. ([^ \t]+?)) # URL without brackets.
  260. ([ \t]+(".*?"))? # Optional title.
  261. [ \t]*$ # Optional trailing spaces or tabs.
  262. </match>
  263. <include>
  264. <context sub-pattern="1" class="no-spell-check" style-ref="label"/>
  265. <context sub-pattern="3" class="no-spell-check" style-ref="url"/>
  266. <context sub-pattern="4" class="no-spell-check" style-ref="url"/>
  267. <context sub-pattern="6" style-ref="attribute-value"/>
  268. </include>
  269. </context>
  270. <!-- Examples:
  271. ![alt text](http://www.example.com/image.jpg)
  272. ![alt text]( <http://www.example.com/image.jpg> )
  273. ![alt text] (/path/to/image.jpg "Title")
  274. -->
  275. <context id="inline-image" class="image no-format">
  276. <match extended="true">
  277. (!) # Leading ! sign.
  278. \[(.*?)\][ ]? # Alternate text for the image (and optional space).
  279. \( # Literal parenthesis.
  280. [ \t]* # Optional spaces or tabs after the opening parenthesis.
  281. (&lt;([^ \t]*?)&gt; | # Image path or URL with brackets.
  282. ([^ \t]*?)) # Image path or URL without brackets.
  283. ([ \t]+(".*?"))? # Optional title.
  284. [ \t]* # Optional spaces or tabs before the closing parenthesis.
  285. \) # Literal parenthesis.
  286. </match>
  287. <include>
  288. <context sub-pattern="1" style-ref="image-marker"/>
  289. <context sub-pattern="2" style-ref="attribute-value"/>
  290. <context sub-pattern="4" class="no-spell-check" style-ref="url"/>
  291. <context sub-pattern="5" class="no-spell-check" style-ref="url"/>
  292. <context sub-pattern="6" style-ref="attribute-value"/>
  293. </include>
  294. </context>
  295. <!-- Examples:
  296. ![alt text][image label]
  297. ![alt text] [image label]
  298. -->
  299. <context id="reference-image" class="image no-format">
  300. <match>(!)\[(.*?)\] ?\[(.*?)\]</match>
  301. <include>
  302. <context sub-pattern="1" style-ref="image-marker"/>
  303. <context sub-pattern="2" style-ref="attribute-value"/>
  304. <context sub-pattern="3" class="no-spell-check" style-ref="label"/>
  305. </include>
  306. </context>
  307. <!-- Examples:
  308. Lorem _ipsum dolor_ sit amet.
  309. Here's an _emphasized text containing an underscore (\_)_.
  310. -->
  311. <context id="underscores-emphasis" style-ref="emphasis" class="i">
  312. <match>(?&lt;!_)_[^_ \t].*?(?&lt;!\\|_| |\t)_(?!_)</match>
  313. </context>
  314. <!-- Examples:
  315. Lorem *ipsum dolor* sit amet.
  316. Here's an *emphasized text containing an asterisk (\*)*.
  317. -->
  318. <context id="asterisks-emphasis" style-ref="emphasis" class="i">
  319. <match>(?&lt;!\*)\*[^\* \t].*?(?&lt;!\\|\*| |\t)\*(?!\*)</match>
  320. </context>
  321. <!-- Examples:
  322. Lorem __ipsum dolor__ sit amet.
  323. Here's a __strongly emphasized text containing an underscore (\_)__.
  324. -->
  325. <context id="underscores-strong-emphasis" style-ref="strong-emphasis" class="b">
  326. <match>__[^_ \t].*?(?&lt;!\\|_| |\t)__</match>
  327. </context>
  328. <!-- Examples:
  329. Lorem **ipsum dolor** sit amet.
  330. Here's a **strongly emphasized text containing an asterisk (\*).**
  331. -->
  332. <context id="asterisks-strong-emphasis" style-ref="strong-emphasis" class="b">
  333. <match>\*\*[^\* \t].*?(?&lt;!\\|\*| |\t)\*\*</match>
  334. </context>
  335. <context id="backslash-escape" style-ref="backslash-escape" class="escape">
  336. <match>\\[\\`*_{}\[\]()#+-.!]</match>
  337. </context>
  338. <!-- Note: a manual line break should be followed by a line containing text,
  339. but since line break can't be used in regex, only trailing spaces or tabs
  340. are matched. -->
  341. <context id="line-break">
  342. <match>(?&lt;=[^ \t])([ \t]{2,})$</match>
  343. <include>
  344. <context sub-pattern="1" style-ref="line-break"/>
  345. </include>
  346. </context>
  347. <context id="markdown-syntax">
  348. <include>
  349. <context ref="atx-header-1"/>
  350. <context ref="atx-header-2"/>
  351. <context ref="atx-header-3"/>
  352. <context ref="atx-header-4"/>
  353. <context ref="atx-header-5"/>
  354. <context ref="atx-header-6"/>
  355. <context ref="setext-header-1"/>
  356. <context ref="setext-header-2"/>
  357. <context ref="horizontal-rule"/>
  358. <context ref="list"/>
  359. <context ref="code-block"/>
  360. <context ref="1-backtick-code-span"/>
  361. <context ref="2-backticks-code-span"/>
  362. <context ref="3-backticks-code-span"/>
  363. <context ref="blockquote"/>
  364. <context ref="automatic-link"/>
  365. <context ref="inline-link"/>
  366. <context ref="reference-link"/>
  367. <context ref="link-definition"/>
  368. <context ref="inline-image"/>
  369. <context ref="reference-image"/>
  370. <context ref="underscores-emphasis"/>
  371. <context ref="asterisks-emphasis"/>
  372. <context ref="underscores-strong-emphasis"/>
  373. <context ref="asterisks-strong-emphasis"/>
  374. <context ref="backslash-escape"/>
  375. <context ref="line-break"/>
  376. </include>
  377. </context>
  378. <replace id="html:embedded-lang-hook-content" ref="markdown-syntax"/>
  379. <context id="ppub-markdown">
  380. <include>
  381. <context ref="markdown-syntax"/>
  382. <!-- Note: even if it's highlighted, Markdown syntax within HTML blocks
  383. (e.g., `<div>`) is not processed. -->
  384. <context ref="html:html"/>
  385. </include>
  386. </context>
  387. </definitions>
  388. </language>