feat(richtext): add Markdown to HTML conversion with mention rebuild

Converts GFM Markdown to canonical HTML via goldmark (GFM, no WithUnsafe),
rewriting task lists into TipTap's structure and resolving @username mentions
to <mention-user> tags against real users.
This commit is contained in:
kolaente 2026-06-28 00:00:45 +02:00
parent 3abe8d650a
commit 3459158b99
6 changed files with 664 additions and 0 deletions

47
pkg/richtext/main_test.go Normal file
View File

@ -0,0 +1,47 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package richtext
import (
"os"
"testing"
"code.vikunja.io/api/pkg/db"
"code.vikunja.io/api/pkg/log"
"code.vikunja.io/api/pkg/user"
)
// TestMain bootstraps a test DB with user fixtures so the mention-resolution
// tests can look up real users. The pure converter tests don't touch the DB.
func TestMain(m *testing.M) {
log.InitLogger()
x, err := db.CreateTestEngine()
if err != nil {
log.Fatal(err)
}
if err := x.Sync2(user.GetTables()...); err != nil {
log.Fatal(err)
}
if err := db.InitTestFixtures("users"); err != nil {
log.Fatal(err)
}
os.Exit(m.Run())
}

View File

@ -0,0 +1,76 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package richtext
import (
"bytes"
"fmt"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"xorm.io/xorm"
)
// markdownConverter renders GFM but never enables html.WithUnsafe() — raw HTML in
// the markdown stays inert, so the only active markup is what goldmark emits. This
// is what stops user-supplied markdown from smuggling in scripts.
var markdownConverter = goldmark.New(
goldmark.WithExtensions(extension.GFM),
)
// MarkdownToHTML converts GFM Markdown to canonical rich-text HTML, rewriting task
// lists into TipTap's <ul data-type="taskList"> form. Mentions are left as literal
// "@username" — see MarkdownToHTMLWithMentions to resolve them.
func MarkdownToHTML(md string) (string, error) {
return markdownToHTML(md, nil)
}
// MarkdownToHTMLWithMentions is MarkdownToHTML plus mention resolution: "@username"
// matching an existing user becomes a <mention-user> tag. Needs a session.
func MarkdownToHTMLWithMentions(s *xorm.Session, md string) (string, error) {
return markdownToHTML(md, s)
}
func markdownToHTML(md string, s *xorm.Session) (string, error) {
var buf bytes.Buffer
if err := markdownConverter.Convert([]byte(md), &buf); err != nil {
return "", fmt.Errorf("converting markdown to html: %w", err)
}
nodes, err := parseHTMLFragment(buf.Bytes())
if err != nil {
return "", err
}
for _, n := range nodes {
convertTaskListItems(n)
}
if s != nil {
if err := rebuildMentions(s, nodes); err != nil {
return "", err
}
}
out, err := renderHTMLNodes(nodes)
if err != nil {
return "", err
}
return strings.TrimSpace(out), nil
}

View File

@ -0,0 +1,100 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package richtext
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMarkdownToHTML(t *testing.T) {
tests := []struct {
name string
md string
want string
}{
{
name: "heading and bold",
md: "# Title\n\nsome **bold** text",
want: "<h1>Title</h1>\n<p>some <strong>bold</strong> text</p>",
},
{
name: "link",
md: "see [the site](https://vikunja.io)",
want: `<p>see <a href="https://vikunja.io">the site</a></p>`,
},
{
name: "task list becomes tiptap dom",
md: "- [x] done\n- [ ] todo",
want: "<ul data-type=\"taskList\">\n<li data-type=\"taskItem\" data-checked=\"true\"><p>done</p></li>\n<li data-type=\"taskItem\" data-checked=\"false\"><p>todo</p></li>\n</ul>",
},
{
name: "nested task list",
md: "- [ ] parent\n - [x] child",
want: "<ul data-type=\"taskList\">\n<li data-type=\"taskItem\" data-checked=\"false\"><p>parent</p><ul data-type=\"taskList\">\n<li data-type=\"taskItem\" data-checked=\"true\"><p>child</p></li>\n</ul>\n</li>\n</ul>",
},
{
name: "task list keeps inline formatting",
md: "- [x] task with **bold** and a [link](https://x.io)",
want: "<ul data-type=\"taskList\">\n<li data-type=\"taskItem\" data-checked=\"true\"><p>task with <strong>bold</strong> and a <a href=\"https://x.io\">link</a></p></li>\n</ul>",
},
{
name: "plain list is not a task list",
md: "- one\n- two",
want: "<ul>\n<li>one</li>\n<li>two</li>\n</ul>",
},
{
name: "pipe table",
md: "| a | b |\n|---|---|\n| 1 | 2 |",
want: "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>1</td>\n<td>2</td>\n</tr>\n</tbody>\n</table>",
},
{
name: "strikethrough",
md: "~~gone~~",
want: "<p><del>gone</del></p>",
},
{
name: "empty markdown is empty",
md: "",
want: "",
},
{
name: "whitespace markdown is empty",
md: " \n ",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := MarkdownToHTML(tt.md)
require.NoError(t, err)
assert.Equal(t, tt.want, got)
})
}
}
// TestMarkdownToHTML_NoUnsafe proves goldmark runs without html.WithUnsafe():
// raw HTML in the markdown must never become active markup.
func TestMarkdownToHTML_NoUnsafe(t *testing.T) {
got, err := MarkdownToHTML("text with <script>alert(1)</script> raw html")
require.NoError(t, err)
assert.NotContains(t, got, "<script>")
assert.NotContains(t, got, "</script>")
}

View File

@ -0,0 +1,178 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package richtext
import (
"fmt"
"regexp"
"unicode"
"unicode/utf8"
"code.vikunja.io/api/pkg/user"
"golang.org/x/net/html"
"xorm.io/xorm"
)
// mentionTokenRegex matches "@username". The username starts/ends with a word
// char so trailing prose punctuation ("@jane.") isn't swallowed. RE2 has no
// look-behind, so the boundary before "@" is checked in code (to reject "a@b").
var mentionTokenRegex = regexp.MustCompile(`@([\p{L}\p{N}_](?:[\p{L}\p{N}._-]*[\p{L}\p{N}_])?)`)
// rebuildMentions replaces "@username" tokens with <mention-user> tags, resolving
// against real users in one batched query. Unknown handles and tokens inside
// code/links are left untouched.
func rebuildMentions(s *xorm.Session, nodes []*html.Node) error {
var textNodes []*html.Node
for _, n := range nodes {
collectMentionTextNodes(n, false, &textNodes)
}
if len(textNodes) == 0 {
return nil
}
candidates := map[string]struct{}{}
for _, tn := range textNodes {
for _, name := range findMentionCandidates(tn.Data) {
candidates[name] = struct{}{}
}
}
if len(candidates) == 0 {
return nil
}
usernames := make([]string, 0, len(candidates))
for name := range candidates {
usernames = append(usernames, name)
}
usersByID, err := user.GetUsersByUsername(s, usernames, false)
if err != nil {
return fmt.Errorf("looking up mentioned users: %w", err)
}
usersByName := make(map[string]*user.User, len(usersByID))
for _, u := range usersByID {
usersByName[u.Username] = u
}
if len(usersByName) == 0 {
return nil
}
for _, tn := range textNodes {
replaceMentionsInTextNode(tn, usersByName)
}
return nil
}
// collectMentionTextNodes gathers text nodes outside <code>, <pre>, <a> and
// <mention-user>.
func collectMentionTextNodes(n *html.Node, inSkip bool, out *[]*html.Node) {
if n.Type == html.TextNode {
if !inSkip {
*out = append(*out, n)
}
return
}
skip := inSkip
if n.Type == html.ElementNode {
switch n.Data {
case "code", "pre", "a", "mention-user":
skip = true
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
collectMentionTextNodes(c, skip, out)
}
}
// findMentionCandidates returns the usernames mentioned in text (word-boundary
// "@" only).
func findMentionCandidates(text string) []string {
var names []string
for _, m := range mentionTokenRegex.FindAllStringSubmatchIndex(text, -1) {
if mentionPrecededByWordChar(text, m[0]) {
continue
}
names = append(names, text[m[2]:m[3]])
}
return names
}
// replaceMentionsInTextNode splits tn, swapping known @mentions for <mention-user> nodes.
func replaceMentionsInTextNode(tn *html.Node, users map[string]*user.User) {
text := tn.Data
var newNodes []*html.Node
cursor := 0
for _, m := range mentionTokenRegex.FindAllStringSubmatchIndex(text, -1) {
start, end := m[0], m[1]
if mentionPrecededByWordChar(text, start) {
continue
}
u, ok := users[text[m[2]:m[3]]]
if !ok {
continue
}
if start > cursor {
newNodes = append(newNodes, &html.Node{Type: html.TextNode, Data: text[cursor:start]})
}
newNodes = append(newNodes, newMentionNode(u))
cursor = end
}
if len(newNodes) == 0 {
return
}
if cursor < len(text) {
newNodes = append(newNodes, &html.Node{Type: html.TextNode, Data: text[cursor:]})
}
parent := tn.Parent
for _, nn := range newNodes {
parent.InsertBefore(nn, tn)
}
parent.RemoveChild(tn)
}
// newMentionNode builds <mention-user data-id="username" data-label="Name">@Name</mention-user>.
// data-id carries the username so extractMentionedUsernames can re-resolve it.
func newMentionNode(u *user.User) *html.Node {
n := &html.Node{
Type: html.ElementNode,
Data: "mention-user",
Attr: []html.Attribute{
{Key: "data-id", Val: u.Username},
{Key: "data-label", Val: u.GetName()},
},
}
n.AppendChild(&html.Node{Type: html.TextNode, Data: "@" + u.GetName()})
return n
}
// mentionPrecededByWordChar reports whether the rune just before atIndex is a
// letter, digit or underscore — i.e. the "@" is mid-token (an email), not a mention.
func mentionPrecededByWordChar(text string, atIndex int) bool {
if atIndex == 0 {
return false
}
r, _ := utf8.DecodeLastRuneInString(text[:atIndex])
return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
}

View File

@ -0,0 +1,107 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package richtext
import (
"testing"
"code.vikunja.io/api/pkg/db"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMarkdownToHTMLWithMentions(t *testing.T) {
t.Run("known mention is rebuilt", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "hi @user1")
require.NoError(t, err)
assert.Equal(t, `<p>hi <mention-user data-id="user1" data-label="user1">@user1</mention-user></p>`, got)
})
t.Run("unknown mention stays literal text", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "hi @nosuchuser")
require.NoError(t, err)
assert.Equal(t, "<p>hi @nosuchuser</p>", got)
})
t.Run("mention next to punctuation", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "cc @user1, please review")
require.NoError(t, err)
assert.Equal(t, `<p>cc <mention-user data-id="user1" data-label="user1">@user1</mention-user>, please review</p>`, got)
})
t.Run("multiple mentions resolve in one pass", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "ping @user1 and @user2")
require.NoError(t, err)
assert.Contains(t, got, `<mention-user data-id="user1" data-label="user1">@user1</mention-user>`)
assert.Contains(t, got, `<mention-user data-id="user2" data-label="user2">@user2</mention-user>`)
})
t.Run("email is not a mention", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "reach me at user1@example.com please")
require.NoError(t, err)
assert.NotContains(t, got, "mention-user")
})
t.Run("mention inside code span is ignored", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "use `@user1` literally")
require.NoError(t, err)
assert.NotContains(t, got, "mention-user")
assert.Contains(t, got, "<code>@user1</code>")
})
t.Run("mention inside task list item", func(t *testing.T) {
db.LoadAndAssertFixtures(t)
s := db.NewSession()
defer s.Close()
got, err := MarkdownToHTMLWithMentions(s, "- [ ] ping @user1")
require.NoError(t, err)
assert.Contains(t, got, `data-type="taskItem"`)
assert.Contains(t, got, `<mention-user data-id="user1" data-label="user1">@user1</mention-user>`)
})
t.Run("no session leaves mention as text", func(t *testing.T) {
got, err := MarkdownToHTML("hi @user1")
require.NoError(t, err)
assert.Equal(t, "<p>hi @user1</p>", got)
})
}

View File

@ -0,0 +1,156 @@
// Vikunja is a to-do list application to facilitate your life.
// Copyright 2018-present Vikunja and contributors. All rights reserved.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package richtext
import (
"bytes"
"fmt"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// parseHTMLFragment parses an HTML fragment in a <body> context (so tables/lists parse).
func parseHTMLFragment(in []byte) ([]*html.Node, error) {
context := &html.Node{Type: html.ElementNode, Data: "body", DataAtom: atom.Body}
nodes, err := html.ParseFragment(bytes.NewReader(in), context)
if err != nil {
return nil, fmt.Errorf("parsing converted html: %w", err)
}
return nodes, nil
}
func renderHTMLNodes(nodes []*html.Node) (string, error) {
var buf bytes.Buffer
for _, n := range nodes {
if err := html.Render(&buf, n); err != nil {
return "", fmt.Errorf("rendering converted html: %w", err)
}
}
return buf.String(), nil
}
// convertTaskListItems rewrites goldmark's GFM task-list output
// (<li><input type="checkbox"> text</li>) into the TipTap
// <ul data-type="taskList"><li data-type="taskItem" data-checked="…"><p>text</p></li>
// shape the web editor and resetDescriptionChecklist (recurring-task reset) expect.
func convertTaskListItems(n *html.Node) {
for c := n.FirstChild; c != nil; c = c.NextSibling {
convertTaskListItems(c)
}
if n.Type != html.ElementNode || n.Data != "li" {
return
}
input := leadingCheckbox(n)
if input == nil {
return
}
_, checked := dom.GetAttribute(input, "checked")
dom.RemoveNode(input)
setAttribute(n, "data-type", "taskItem")
setAttribute(n, "data-checked", boolString(checked))
wrapLeadingInlineInParagraph(n)
if p := n.Parent; p != nil && p.Type == html.ElementNode && (p.Data == "ul" || p.Data == "ol") {
setAttribute(p, "data-type", "taskList")
}
}
// leadingCheckbox returns the <input type="checkbox"> at the start of li (after
// skipping insignificant whitespace), or nil if li isn't a task item.
func leadingCheckbox(li *html.Node) *html.Node {
for c := li.FirstChild; c != nil; c = c.NextSibling {
if isWhitespaceText(c) {
continue
}
if c.Type == html.ElementNode && c.Data == "input" && dom.GetAttributeOr(c, "type", "") == "checkbox" {
return c
}
return nil
}
return nil
}
// wrapLeadingInlineInParagraph moves li's leading inline content (everything up
// to the first nested list) into a <p>, matching TipTap's taskItem shape.
func wrapLeadingInlineInParagraph(li *html.Node) {
var inline []*html.Node
for c := li.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && (c.Data == "ul" || c.Data == "ol") {
break
}
inline = append(inline, c)
}
allWhitespace := true
for _, c := range inline {
if !isWhitespaceText(c) {
allWhitespace = false
break
}
}
if len(inline) == 0 || allWhitespace {
return
}
p := &html.Node{Type: html.ElementNode, Data: "p", DataAtom: atom.P}
for _, c := range inline {
li.RemoveChild(c)
p.AppendChild(c)
}
li.InsertBefore(p, li.FirstChild)
trimEdgeWhitespace(p)
}
// trimEdgeWhitespace trims leading/trailing whitespace from the first and last
// text nodes of n so the wrapped paragraph doesn't keep goldmark's "<input> "
// spacing or trailing newline.
func trimEdgeWhitespace(n *html.Node) {
if first := n.FirstChild; first != nil && first.Type == html.TextNode {
first.Data = strings.TrimLeft(first.Data, " \t\n\r")
}
if last := n.LastChild; last != nil && last.Type == html.TextNode {
last.Data = strings.TrimRight(last.Data, " \t\n\r")
}
}
func setAttribute(n *html.Node, key, val string) {
for i, a := range n.Attr {
if a.Key == key {
n.Attr[i].Val = val
return
}
}
n.Attr = append(n.Attr, html.Attribute{Key: key, Val: val})
}
func boolString(b bool) string {
if b {
return "true"
}
return "false"
}
func isWhitespaceText(n *html.Node) bool {
return n.Type == html.TextNode && strings.TrimSpace(n.Data) == ""
}