fix(search): rank exact task-index match before BM25 text relevance on ParadeDB

The BM25 relevance ranking added `pdb.score(tasks.id)` to the search SELECT and ORDER BY. ParadeDB can only compute a score for a pure-ParadeDB query shape, so two cases produced "pq: Unsupported query shape": 1. A numeric search (e.g. "#17") OR's the ParadeDB `|||` operators with a plain `"index" = N` equality in the same boolean group. Scoring that mixed group is unsupported. 2. When favorites are in scope, the `project_id IN (...) OR id IN (<favorites subquery>)` predicate is unsupported under pdb.score regardless of how the subquery is expressed (OR or UNION) - it just was never exercised because the ranking tests searched a single project with no favorites. Both are now handled so each query ParadeDB scores is a supported shape: - Numeric search runs as two arms: an exact `index = N` arm (no score, ranked first) and a text `|||` arm scored by pdb.score DESC. The arms are merged in Go (index matches first, deduped by task id) and paginated in memory; the count query keeps the combined `OR index = N` predicate (no score), which is a supported shape, so totalItems stays correct. - The relevance arms reach favorites through a LEFT JOIN and scope on the joined column (`rank_favorites.entity_id IS NOT NULL`) instead of an id-IN-subquery, which ParadeDB can score. Non-numeric (pure text) searches keep the single pdb.score-ordered query. Non-ParadeDB databases are unchanged (no pdb.score, no ranking). TestTaskSearchRelevanceRankingNumericIndex covers the numeric case: on ParadeDB the exact-index task ranks first, then text matches by relevance; on other databases it only asserts the matches are returned. Validated against the CI-pinned ParadeDB image (paradedb 0.21.12): the full pkg/models and pkg/webtests suites pass, including TestTaskCollection_ReadAll/search_for_task_index and the HTTP search tests.
2026-06-19 22:52:26 +02:00 · 2026-06-19 22:52:26 +02:00 · 116fb1e2e0
parent 9fb0d86c1b
commit 116fb1e2e0
2 changed files with 205 additions and 51 deletions
--- a/pkg/models/task_search.go
+++ b/pkg/models/task_search.go
@ -320,11 +320,17 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo

 	// Then return all tasks for that projects
 	var where builder.Cond
+	// textSearchCond holds only the ParadeDB/ILIKE title+description match, kept
+	// separate from the index-equality match so the relevance ranking path can
+	// score a pure-ParadeDB query (see rankByRelevance below).
+	var textSearchCond builder.Cond
+	var searchIndex int64

 	if opts.search != "" {
-		where = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
+		textSearchCond = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
+		where = textSearchCond

-		searchIndex := getTaskIndexFromSearchString(opts.search)
+		searchIndex = getTaskIndexFromSearchString(opts.search)
 		if searchIndex > 0 {
 			where = builder.Or(where, builder.Eq{"`index`": searchIndex})
 		}
@ -350,8 +356,10 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		favoritesCond = builder.In("tasks.id", favCond)
 	}

+	scopeCond := builder.Or(projectIDCond, favoritesCond)
+
 	limit, start := getLimitFromPageIndex(opts.page, opts.perPage)
-	cond := builder.And(builder.Or(projectIDCond, favoritesCond), where, filterCond)
+	cond := builder.And(scopeCond, where, filterCond)

 	// ParadeDB exposes the BM25 relevance score via pdb.score(<key_field>) for any
 	// query containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies).
@ -360,15 +368,20 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	// ParadeDB-only: pdb.score is invalid SQL on sqlite/mysql/plain postgres.
 	rankByRelevance := db.ParadeDBAvailable() && opts.search != "" && !opts.userProvidedSort

+	// ParadeDB's pdb.score() rejects an `id IN (<subquery>)` favorites scope (whether
+	// expressed as OR or UNION) as an unsupported query shape, so the relevance arms
+	// reach favorites through a LEFT JOIN and scope on the joined column instead,
+	// which it can score. Only relevant when favorites are part of the scope.
+	rankFavoritesJoin := rankByRelevance && d.hasFavoritesProject
+	rankScopeCond := scopeCond
+	if rankFavoritesJoin {
+		rankScopeCond = builder.Or(projectIDCond, builder.Expr("rank_favorites.entity_id IS NOT NULL"))
+	}
+
 	var distinct = "tasks.*"
 	if strings.Contains(orderby, "task_positions.") {
 		distinct += ", task_positions.position"
 	}
-	if rankByRelevance {
-		// DISTINCT requires every ORDER BY expression to appear in the SELECT list.
-		distinct += ", pdb.score(tasks.id)"
-		orderby = "pdb.score(tasks.id) DESC, " + orderby
-	}

 	var expandSubtasks = false
 	for _, expandable := range opts.expand {
@ -378,56 +391,124 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		}
 	}

-	if expandSubtasks {
-		cond = builder.And(cond, builder.Or(
-			builder.IsNull{"task_relations.id"},
-			builder.IsNull{"parent_tasks.id"},
-			builder.Expr("parent_tasks.project_id != tasks.project_id"),
-		))
-	}
-
-	query := d.s.Where(cond)
-	if rankByRelevance {
-		// xorm's Distinct() quotes each column, which corrupts the pdb.score(tasks.id)
-		// function call. Select() passes the raw column list through untouched, and
-		// Distinct() (no args) still emits the DISTINCT keyword.
-		query = query.Select(distinct).Distinct()
-	} else {
-		query = query.Distinct(distinct)
-	}
-	if limit > 0 {
-		query = query.Limit(limit, start)
-	}
-
-	for _, param := range opts.sortby {
-		if param.sortBy == taskPropertyPosition {
-			query = query.Join("LEFT", "task_positions", "task_positions.task_id = tasks.id AND task_positions.project_view_id = ?", param.projectViewID)
-			break
+	// addJoins applies the same LEFT JOINs the count query and every fetch arm
+	// rely on (position sort, bucket filter, subtask expansion).
+	addJoins := func(query *xorm.Session) *xorm.Session {
+		for _, param := range opts.sortby {
+			if param.sortBy == taskPropertyPosition {
+				query = query.Join("LEFT", "task_positions", "task_positions.task_id = tasks.id AND task_positions.project_view_id = ?", param.projectViewID)
+				break
+			}
 		}
+		if joinTaskBuckets {
+			joinCond := "task_buckets.task_id = tasks.id"
+			if opts.projectViewID > 0 {
+				joinCond += " AND task_buckets.project_view_id = ?"
+				query = query.Join("LEFT", "task_buckets", joinCond, opts.projectViewID)
+			} else {
+				query = query.Join("LEFT", "task_buckets", joinCond)
+			}
+		}
+		if expandSubtasks {
+			query = query.
+				Join("LEFT", "task_relations", "tasks.id = task_relations.task_id and task_relations.relation_kind = 'parenttask'").
+				Join("LEFT", "tasks parent_tasks", "task_relations.other_task_id = parent_tasks.id")
+		}
+		return query
 	}

-	if joinTaskBuckets {
-		joinCond := "task_buckets.task_id = tasks.id"
-		if opts.projectViewID > 0 {
-			joinCond += " AND task_buckets.project_view_id = ?"
-			query = query.Join("LEFT", "task_buckets", joinCond, opts.projectViewID)
+	subtaskParentCond := builder.Or(
+		builder.IsNull{"task_relations.id"},
+		builder.IsNull{"parent_tasks.id"},
+		builder.Expr("parent_tasks.project_id != tasks.project_id"),
+	)
+	if expandSubtasks {
+		cond = builder.And(cond, subtaskParentCond)
+	}
+
+	// fetchTasks runs a single fetch arm: it builds the DISTINCT select (raw, so
+	// xorm doesn't quote-corrupt the pdb.score function call), applies the joins
+	// and the given order. paginate=false fetches every matching row so the caller
+	// can merge multiple arms and slice the combined result in Go.
+	fetchTasks := func(armCond builder.Cond, selectCols, armOrderby string, paginate bool) ([]*Task, error) {
+		query := d.s.Where(armCond)
+		if selectCols == distinct {
+			query = query.Distinct(selectCols)
 		} else {
-			query = query.Join("LEFT", "task_buckets", joinCond)
+			// Select() passes the raw column list through untouched while Distinct()
+			// (no args) still emits the DISTINCT keyword.
+			query = query.Select(selectCols).Distinct()
 		}
-	}
-	if expandSubtasks {
-		query = query.
-			Join("LEFT", "task_relations", "tasks.id = task_relations.task_id and task_relations.relation_kind = 'parenttask'").
-			Join("LEFT", "tasks parent_tasks", "task_relations.other_task_id = parent_tasks.id")
+		if paginate && limit > 0 {
+			query = query.Limit(limit, start)
+		}
+		if rankFavoritesJoin {
+			query = query.Join("LEFT", "favorites rank_favorites", "rank_favorites.entity_id = tasks.id AND rank_favorites.user_id = ? AND rank_favorites.kind = ?", d.a.GetID(), FavoriteKindTask)
+		}
+		query = addJoins(query)
+
+		armTasks := []*Task{}
+		if err := query.OrderBy(armOrderby).Find(&armTasks); err != nil {
+			sql, vals := query.LastSQL()
+			return nil, fmt.Errorf("could not fetch tasks, error was '%w', sql: '%v', values: %v", err, sql, vals)
+		}
+		return armTasks, nil
 	}

-	tasks = []*Task{}
-	err = query.
-		OrderBy(orderby).
-		Find(&tasks)
-	if err != nil {
-		sql, vals := query.LastSQL()
-		return nil, 0, fmt.Errorf("could not fetch tasks, error was '%w', sql: '%v', values: %v", err, sql, vals)
+	rankCondWith := func(searchCond builder.Cond) builder.Cond {
+		c := builder.And(rankScopeCond, searchCond, filterCond)
+		if expandSubtasks {
+			c = builder.And(c, subtaskParentCond)
+		}
+		return c
+	}
+
+	switch {
+	case rankByRelevance && searchIndex > 0:
+		// A numeric search matches both the task index and the fuzzy text. pdb.score
+		// can only score a pure-ParadeDB query, so a `||| ... OR index = N` group is
+		// an unsupported query shape on ParadeDB. Run two supported arms instead and
+		// rank exact index matches first, then text matches by relevance.
+		indexTasks, err := fetchTasks(rankCondWith(builder.Eq{"`index`": searchIndex}), distinct, orderby, false)
+		if err != nil {
+			return nil, 0, err
+		}
+
+		textTasks, err := fetchTasks(rankCondWith(textSearchCond), distinct+", pdb.score(tasks.id)", "pdb.score(tasks.id) DESC, "+orderby, false)
+		if err != nil {
+			return nil, 0, err
+		}
+
+		// Exact index matches rank first; dedup a task matching both arms in favour
+		// of its index-match position.
+		seen := make(map[int64]bool, len(indexTasks)+len(textTasks))
+		merged := make([]*Task, 0, len(indexTasks)+len(textTasks))
+		for _, t := range indexTasks {
+			if !seen[t.ID] {
+				seen[t.ID] = true
+				merged = append(merged, t)
+			}
+		}
+		for _, t := range textTasks {
+			if !seen[t.ID] {
+				seen[t.ID] = true
+				merged = append(merged, t)
+			}
+		}
+
+		tasks = paginateInMemory(merged, limit, start)
+	case rankByRelevance:
+		// Pure text search: a single pdb.score-ordered query over the score-able
+		// scope is a supported shape.
+		tasks, err = fetchTasks(rankCondWith(textSearchCond), distinct+", pdb.score(tasks.id)", "pdb.score(tasks.id) DESC, "+orderby, true)
+		if err != nil {
+			return nil, 0, err
+		}
+	default:
+		tasks, err = fetchTasks(cond, distinct, orderby, true)
+		if err != nil {
+			return nil, 0, err
+		}
 	}

 	// fetch subtasks when expanding
@ -503,3 +584,16 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	}
 	return
 }
+
+// paginateInMemory slices an already-ordered result set. limit == 0 means "no
+// limit" (return everything from start onwards), matching getLimitFromPageIndex.
+func paginateInMemory(tasks []*Task, limit, start int) []*Task {
+	if start >= len(tasks) {
+		return []*Task{}
+	}
+	tasks = tasks[start:]
+	if limit > 0 && limit < len(tasks) {
+		tasks = tasks[:limit]
+	}
+	return tasks
+}
--- a/pkg/models/task_search_test.go
+++ b/pkg/models/task_search_test.go
@ -17,6 +17,7 @@
 package models

 import (
+	"strconv"
 	"testing"

 	"code.vikunja.io/api/pkg/db"
@ -105,3 +106,62 @@ func TestTaskSearchRelevanceRanking(t *testing.T) {
 		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
 	})
 }
+
+// TestTaskSearchRelevanceRankingNumericIndex covers a numeric search (e.g. "#42"):
+// it matches both a task by its per-project index and tasks whose title/description
+// contain that number via fuzzy text search. On ParadeDB the exact-index task must
+// rank first, then the text matches by relevance. This is the case that combines an
+// `index = N` equality with the ParadeDB ||| operators; scoring such a mixed boolean
+// group is an unsupported query shape, so it is run as two arms (index, then text).
+func TestTaskSearchRelevanceRankingNumericIndex(t *testing.T) {
+	db.LoadAndAssertFixtures(t)
+	s := db.NewSession()
+	defer s.Close()
+
+	usr := &user.User{ID: 1}
+
+	// The exact-index task: its index is what we search for. Its title deliberately
+	// does not contain the number, so it can only be found by the index match.
+	exactIndex := &Task{Title: "Quarterly planning offsite", ProjectID: 1}
+	require.NoError(t, exactIndex.Create(s, usr))
+	require.NotZero(t, exactIndex.Index)
+
+	indexStr := strconv.FormatInt(exactIndex.Index, 10)
+	search := "#" + indexStr
+
+	// Text matches: their titles contain the searched number so the fuzzy text arm
+	// returns them, but they are not the exact-index task.
+	textA := &Task{Title: "Review ticket " + search + " backlog", ProjectID: 1}
+	require.NoError(t, textA.Create(s, usr))
+	textB := &Task{Title: "Notes about " + search, ProjectID: 1}
+	require.NoError(t, textB.Create(s, usr))
+
+	assertIndexFirst := func(t *testing.T, tc *TaskCollection) {
+		got, _, _, err := tc.ReadAll(s, usr, search, 0, 50)
+		require.NoError(t, err)
+
+		gotTasks, is := got.([]*Task)
+		require.True(t, is)
+
+		gotIDs := make([]int64, len(gotTasks))
+		for i, tsk := range gotTasks {
+			gotIDs[i] = tsk.ID
+		}
+
+		require.Contains(t, gotIDs, exactIndex.ID, "the exact-index task should be returned")
+
+		if db.ParadeDBAvailable() {
+			require.NotEmpty(t, gotTasks)
+			assert.Equal(t, exactIndex.ID, gotTasks[0].ID, "the exact-index match should rank first")
+			assert.Contains(t, gotIDs, textA.ID, "text matches should also be returned, ranked after the index match")
+		}
+	}
+
+	t.Run("no view", func(t *testing.T) {
+		assertIndexFirst(t, &TaskCollection{ProjectID: 1})
+	})
+
+	t.Run("list view", func(t *testing.T) {
+		assertIndexFirst(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
+	})
+}