463 lines
8.5 KiB
Go
Raw Normal View History

2024-03-31 21:01:47 +02:00
package markdown
2024-03-31 23:08:01 +02:00
import (
"html"
"strings"
2024-04-02 11:50:37 +02:00
"unsafe"
2024-03-31 23:08:01 +02:00
)
var (
headerStart = []string{"<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>"}
headerEnd = []string{"</h1>", "</h2>", "</h3>", "</h4>", "</h5>", "</h6>"}
)
2024-04-02 10:44:47 +02:00
// renderer represents a Markdown to HTML renderer.
2024-04-01 18:10:52 +02:00
type renderer struct {
2024-04-02 10:44:47 +02:00
out []byte
2024-04-01 20:59:32 +02:00
paragraphLevel int
quoteLevel int
listLevel int
2024-04-03 09:23:52 +02:00
olistLevel int
2024-04-01 20:59:32 +02:00
tableLevel int
2024-04-01 21:27:10 +02:00
codeLines int
2024-04-01 20:59:32 +02:00
tableHeaderWritten bool
2024-04-01 21:27:10 +02:00
inCodeBlock bool
2024-04-01 18:10:52 +02:00
}
2024-03-31 21:01:47 +02:00
// Render creates HTML from the supplied markdown text.
func Render(markdown string) string {
2024-03-31 23:08:01 +02:00
var (
2024-04-01 18:10:52 +02:00
r renderer
2024-03-31 23:08:01 +02:00
i = 0
lineStart = 0
)
2024-04-02 10:44:47 +02:00
r.out = make([]byte, 0, nextPowerOf2(uint32(len(markdown)+4)))
2024-03-31 23:08:01 +02:00
for {
if i > len(markdown) {
2024-04-01 20:04:20 +02:00
r.closeAll()
2024-04-01 18:10:52 +02:00
for range r.quoteLevel {
2024-04-02 10:44:47 +02:00
r.WriteString("</blockquote>")
2024-04-01 18:10:52 +02:00
}
2024-04-02 11:50:37 +02:00
return unsafe.String(unsafe.SliceData(r.out), len(r.out))
2024-03-31 23:08:01 +02:00
}
if i != len(markdown) && markdown[i] != '\n' {
i++
continue
}
line := markdown[lineStart:i]
lineStart = i + 1
i++
2024-04-01 18:10:52 +02:00
r.processLine(line)
}
}
2024-03-31 23:08:01 +02:00
2024-04-01 18:10:52 +02:00
func (r *renderer) processLine(line string) {
2024-04-01 21:27:10 +02:00
if r.inCodeBlock {
if strings.HasPrefix(line, "```") {
2024-04-02 10:44:47 +02:00
r.WriteString("</code></pre>")
2024-04-01 21:27:10 +02:00
r.inCodeBlock = false
r.codeLines = 0
} else {
if r.codeLines != 0 {
2024-04-02 10:44:47 +02:00
r.WriteByte('\n')
2024-04-01 21:27:10 +02:00
}
2024-04-02 10:44:47 +02:00
r.WriteString(html.EscapeString(line))
2024-04-01 21:27:10 +02:00
r.codeLines++
}
return
}
2024-04-01 18:10:52 +02:00
newQuoteLevel := 0
2024-03-31 23:08:01 +02:00
2024-04-01 18:10:52 +02:00
for strings.HasPrefix(line, ">") {
line = strings.TrimSpace(line[1:])
newQuoteLevel++
}
2024-03-31 23:08:01 +02:00
2024-04-01 18:10:52 +02:00
if newQuoteLevel > r.quoteLevel {
r.closeParagraphs()
2024-03-31 23:08:01 +02:00
2024-04-01 18:10:52 +02:00
for range newQuoteLevel - r.quoteLevel {
2024-04-02 10:44:47 +02:00
r.WriteString("<blockquote>")
2024-03-31 23:08:01 +02:00
}
2024-04-01 18:10:52 +02:00
} else if newQuoteLevel < r.quoteLevel {
r.closeParagraphs()
for range r.quoteLevel - newQuoteLevel {
2024-04-02 10:44:47 +02:00
r.WriteString("</blockquote>")
2024-04-01 18:10:52 +02:00
}
}
r.quoteLevel = newQuoteLevel
2024-04-01 20:04:20 +02:00
if len(line) == 0 {
r.closeAll()
return
}
switch line[0] {
case '#':
r.closeAll()
2024-04-01 18:10:52 +02:00
space := strings.IndexByte(line, ' ')
if space > 0 && space <= 6 {
2024-04-02 10:44:47 +02:00
r.WriteString(headerStart[space-1])
2024-04-01 18:10:52 +02:00
r.writeText(line[space+1:])
2024-04-02 10:44:47 +02:00
r.WriteString(headerEnd[space-1])
2024-04-01 18:10:52 +02:00
}
return
2024-04-01 20:04:20 +02:00
case '-', '*':
2024-04-02 19:25:42 +02:00
if strings.HasPrefix(line, "---") {
r.WriteString("<hr>")
return
}
2024-04-02 19:48:35 +02:00
if len(line) > 1 && line[1] == ' ' {
line = line[2:]
2024-04-01 20:04:20 +02:00
2024-04-02 19:48:35 +02:00
if r.listLevel == 0 {
r.WriteString("<ul>")
r.listLevel++
}
2024-04-01 20:04:20 +02:00
2024-04-02 19:48:35 +02:00
r.WriteString("<li>")
r.writeText(line)
r.WriteString("</li>")
return
}
2024-04-01 20:59:32 +02:00
2024-04-01 21:27:10 +02:00
case '`':
if strings.HasPrefix(line, "```") {
language := line[3:]
if !r.inCodeBlock {
if language != "" {
2024-04-02 10:44:47 +02:00
r.WriteString("<pre><code class=\"language-")
r.WriteString(html.EscapeString(language))
r.WriteString("\">")
2024-04-01 21:27:10 +02:00
} else {
2024-04-02 10:44:47 +02:00
r.WriteString("<pre><code>")
2024-04-01 21:27:10 +02:00
}
r.inCodeBlock = true
}
return
}
2024-04-01 20:59:32 +02:00
case '|':
line = line[1:]
if r.tableLevel == 0 {
2024-04-02 10:44:47 +02:00
r.WriteString("<table><thead>")
2024-04-01 20:59:32 +02:00
r.tableLevel++
}
column := 0
for {
pipe := strings.IndexByte(line, '|')
if pipe == -1 {
2024-04-02 10:44:47 +02:00
r.WriteString("</tr>")
2024-04-01 20:59:32 +02:00
return
}
content := strings.TrimSpace(line[:pipe])
if strings.HasPrefix(content, "---") {
2024-04-02 10:44:47 +02:00
r.WriteString("</thead><tbody>")
2024-04-01 20:59:32 +02:00
r.tableHeaderWritten = true
return
}
if column == 0 {
2024-04-02 10:44:47 +02:00
r.WriteString("<tr>")
2024-04-01 20:59:32 +02:00
}
if r.tableHeaderWritten {
2024-04-02 10:44:47 +02:00
r.WriteString("<td>")
2024-04-01 20:59:32 +02:00
r.writeText(content)
2024-04-02 10:44:47 +02:00
r.WriteString("</td>")
2024-04-01 20:59:32 +02:00
} else {
2024-04-02 10:44:47 +02:00
r.WriteString("<th>")
2024-04-01 20:59:32 +02:00
r.writeText(content)
2024-04-02 10:44:47 +02:00
r.WriteString("</th>")
2024-04-01 20:59:32 +02:00
}
line = line[pipe+1:]
column++
}
2024-04-01 18:10:52 +02:00
}
2024-04-03 09:23:52 +02:00
pos := 0
for pos < len(line) && line[pos] >= '0' && line[pos] <= '9' {
pos++
if pos < len(line) && (line[pos] == '.' || line[pos] == ')') {
line = strings.TrimSpace(line[pos+1:])
if r.olistLevel == 0 {
r.WriteString("<ol>")
r.olistLevel++
}
r.WriteString("<li>")
r.writeText(line)
r.WriteString("</li>")
return
}
}
2024-04-01 18:10:52 +02:00
if r.paragraphLevel == 0 {
2024-04-02 10:44:47 +02:00
r.WriteString("<p>")
2024-04-01 18:10:52 +02:00
r.paragraphLevel++
r.writeText(line)
return
}
2024-04-02 10:44:47 +02:00
r.WriteByte(' ')
2024-04-01 18:10:52 +02:00
r.writeText(line)
}
2024-04-01 20:04:20 +02:00
// closeAll closes all open tags.
func (r *renderer) closeAll() {
r.closeLists()
r.closeParagraphs()
2024-04-01 20:59:32 +02:00
r.closeTables()
2024-04-01 20:04:20 +02:00
}
2024-04-01 18:10:52 +02:00
// closeParagraphs closes open paragraphs.
func (r *renderer) closeParagraphs() {
for range r.paragraphLevel {
2024-04-02 10:44:47 +02:00
r.WriteString("</p>")
2024-03-31 23:08:01 +02:00
}
2024-04-01 18:10:52 +02:00
r.paragraphLevel = 0
2024-03-31 21:01:47 +02:00
}
2024-04-01 11:54:14 +02:00
2024-04-01 20:04:20 +02:00
// closeLists closes open lists.
func (r *renderer) closeLists() {
for range r.listLevel {
2024-04-02 10:44:47 +02:00
r.WriteString("</ul>")
2024-04-01 20:04:20 +02:00
}
2024-04-03 09:23:52 +02:00
for range r.olistLevel {
r.WriteString("</ol>")
}
2024-04-01 20:04:20 +02:00
r.listLevel = 0
2024-04-03 09:23:52 +02:00
r.olistLevel = 0
2024-04-01 20:04:20 +02:00
}
2024-04-01 20:59:32 +02:00
// closeTables closes open tables.
func (r *renderer) closeTables() {
for range r.tableLevel {
2024-04-02 10:44:47 +02:00
r.WriteString("</tbody></table>")
2024-04-01 20:59:32 +02:00
}
r.tableLevel = 0
r.tableHeaderWritten = false
}
2024-04-01 18:10:52 +02:00
// writeText converts inline markdown to HTML.
func (r *renderer) writeText(markdown string) {
2024-04-01 11:54:14 +02:00
var (
2024-04-02 20:14:07 +02:00
tokenStart = 0
searchStart = 0
linkTextStart = -1
linkTextEnd = -1
2024-04-03 21:13:59 +02:00
linkIsImage = false
2024-04-02 20:14:07 +02:00
emStart = -1
strongStart = -1
2024-04-02 20:50:33 +02:00
strikeStart = -1
2024-04-01 11:54:14 +02:00
)
2024-04-02 21:30:50 +02:00
begin:
2024-04-01 11:54:14 +02:00
for {
2024-04-03 21:13:59 +02:00
i := strings.IndexAny(markdown[searchStart:], "[]()`*_~!")
2024-04-02 20:14:07 +02:00
if i == -1 {
2024-04-02 10:44:47 +02:00
r.WriteString(html.EscapeString(markdown[tokenStart:]))
2024-04-01 11:54:14 +02:00
return
}
2024-04-02 20:14:07 +02:00
i += searchStart
searchStart = i + 1
2024-04-01 11:54:14 +02:00
2024-04-02 20:31:09 +02:00
switch markdown[i] {
2024-04-01 11:54:14 +02:00
case '[':
2024-04-02 10:44:47 +02:00
r.WriteString(html.EscapeString(markdown[tokenStart:i]))
2024-04-01 11:54:14 +02:00
tokenStart = i
2024-04-02 20:14:07 +02:00
linkTextStart = i
2024-04-02 19:13:28 +02:00
2024-04-01 11:54:14 +02:00
case ']':
2024-04-02 20:14:07 +02:00
linkTextEnd = i
2024-04-02 19:13:28 +02:00
2024-04-01 11:54:14 +02:00
case '(':
2024-04-02 21:30:50 +02:00
if linkTextStart == -1 || linkTextEnd == -1 {
continue
2024-04-01 11:54:14 +02:00
}
2024-04-02 21:30:50 +02:00
level := 1
2024-04-02 19:13:28 +02:00
2024-04-02 21:30:50 +02:00
for {
pos := strings.IndexAny(markdown[searchStart:], "()")
2024-04-01 11:54:14 +02:00
2024-04-02 21:30:50 +02:00
if pos == -1 {
goto begin
}
2024-04-01 11:54:14 +02:00
2024-04-02 21:30:50 +02:00
switch markdown[searchStart+pos] {
case '(':
level++
case ')':
level--
if level == 0 {
urlEnd := searchStart + pos
searchStart = urlEnd + 1
linkText := markdown[linkTextStart+1 : linkTextEnd]
linkURL := markdown[i+1 : urlEnd]
2024-04-03 21:13:59 +02:00
if linkIsImage {
r.WriteString("<img src=\"")
r.WriteString(sanitizeURL(linkURL))
r.WriteString("\" alt=\"")
r.WriteString(html.EscapeString(linkText))
r.WriteString("\">")
} else {
r.WriteString("<a href=\"")
r.WriteString(sanitizeURL(linkURL))
r.WriteString("\">")
r.WriteString(html.EscapeString(linkText))
r.WriteString("</a>")
}
2024-04-02 21:30:50 +02:00
linkTextStart = -1
linkTextEnd = -1
tokenStart = urlEnd + 1
goto begin
}
}
2024-04-01 11:54:14 +02:00
2024-04-02 21:30:50 +02:00
searchStart += pos + 1
2024-04-01 11:54:14 +02:00
}
2024-04-02 19:13:28 +02:00
case '`':
2024-04-02 21:46:23 +02:00
end := strings.IndexByte(markdown[searchStart:], '`')
if end == -1 {
continue
2024-04-02 19:13:28 +02:00
}
2024-04-02 19:48:35 +02:00
2024-04-02 21:46:23 +02:00
r.WriteString(html.EscapeString(markdown[tokenStart:i]))
r.WriteString("<code>")
r.WriteString(html.EscapeString(markdown[searchStart : searchStart+end]))
r.WriteString("</code>")
searchStart += end + 1
tokenStart = searchStart
2024-04-02 19:48:35 +02:00
case '*', '_':
if i == emStart {
strongStart = i + 1
emStart = -1
} else if strongStart != -1 {
r.WriteString("<strong>")
r.WriteString(html.EscapeString(markdown[strongStart:i]))
r.WriteString("</strong>")
strongStart = -1
2024-04-02 20:14:07 +02:00
tokenStart = i + 2
searchStart = tokenStart
2024-04-02 19:48:35 +02:00
} else if emStart != -1 {
r.WriteString("<em>")
r.WriteString(html.EscapeString(markdown[emStart:i]))
r.WriteString("</em>")
emStart = -1
tokenStart = i + 1
} else {
r.WriteString(html.EscapeString(markdown[tokenStart:i]))
tokenStart = i
emStart = i + 1
}
2024-04-02 20:50:33 +02:00
case '~':
if i+1 >= len(markdown) || markdown[i+1] != '~' {
continue
}
if strikeStart != -1 {
r.WriteString("<del>")
r.WriteString(html.EscapeString(markdown[strikeStart:i]))
r.WriteString("</del>")
strikeStart = -1
tokenStart = i + 2
} else {
r.WriteString(html.EscapeString(markdown[tokenStart:i]))
tokenStart = i
strikeStart = i + 2
}
2024-04-03 21:13:59 +02:00
case '!':
if i+1 >= len(markdown) || markdown[i+1] != '[' {
continue
}
r.WriteString(html.EscapeString(markdown[tokenStart:i]))
tokenStart = i
linkTextStart = i + 1
searchStart++
linkIsImage = true
2024-04-01 11:54:14 +02:00
}
}
}
2024-04-01 20:04:20 +02:00
// sanitizeURL makes a URL safe to use as the value for a `href` attribute.
2024-04-01 18:10:52 +02:00
func sanitizeURL(linkURL string) string {
2024-04-01 19:34:02 +02:00
linkURL = strings.TrimSpace(linkURL)
2024-04-01 11:54:14 +02:00
if strings.HasPrefix(strings.ToLower(linkURL), "javascript:") {
return ""
}
return html.EscapeString(linkURL)
}
2024-04-02 10:44:47 +02:00
// WriteByte adds a single byte to the output.
func (r *renderer) WriteByte(b byte) error {
r.out = append(r.out, b)
return nil
}
// WriteString adds a string to the output.
func (r *renderer) WriteString(text string) (int, error) {
r.out = append(r.out, text...)
return len(text), nil
}
// nextPowerOf2 calculates the next 32-bit power of 2.
func nextPowerOf2(x uint32) uint32 {
x--
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
x++
return x
}