From 9fb0d86c1b497b2f2e599bf3b238adcabb00fda2 Mon Sep 17 00:00:00 2001
From: kolaente <k@knt.li>
Date: Fri, 19 Jun 2026 20:46:28 +0200
Subject: [PATCH 1/5] feat(search): rank ParadeDB search results by BM25
 relevance (#2690)

When ParadeDB is in use and a search is run, results now keep the current
fuzzy/OR matching but are ordered by BM25 relevance so tasks matching all
query words rank above tasks matching only some.

Details:
- ParadeDB exposes the BM25 score via pdb.score(<key_field>); Vikunja's
  key_field is id, so we order by pdb.score(tasks.id) DESC, then the
  existing order-by (ending in a stable tasks.id tiebreak).
- Gating: relevance ordering only applies when ParadeDB is available, a
  search term is present, AND the user did not pass an explicit sort_by.
  An explicit user sort still wins; relevance only replaces the default
  (id / position) sort.
- DISTINCT requires every ORDER BY expression to appear in the SELECT
  list, so pdb.score(tasks.id) is added to the selected columns too (for
  both the plain and task_positions-join query shapes). Because xorm's
  Distinct() quotes each column and corrupts the function call, the
  ranking path uses Select(rawColumns).Distinct() instead.
- ParadeDB-only by nature: pdb.score is invalid SQL on sqlite, mysql and
  plain postgres, so those paths are completely unchanged.

A test (TestTaskSearchRelevanceRanking) creates a task matching all query
words plus tasks matching only one, then searches a multi-word query. On
ParadeDB it asserts the all-words task ranks first; on other databases it
only asserts the matching tasks are returned, so it stays green across the
whole CI database matrix. The CI ParadeDB matrix entry exercises the
ranking assertion.

Follow-up (not in this change): boosting results where the words appear in
order / in close proximity above plain all-words matches.

Fixes #2690
---
 pkg/models/task_collection.go  |  1 +
 pkg/models/task_search.go      | 24 ++++++++++++++--
 pkg/models/task_search_test.go | 51 ++++++++++++++++++++++++++++++++++
 pkg/models/tasks.go            |  4 +++
 4 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/pkg/models/task_collection.go b/pkg/models/task_collection.go
index bc217f7ca..442c3e657 100644
--- a/pkg/models/task_collection.go
+++ b/pkg/models/task_collection.go
@@ -142,6 +142,7 @@ func getTaskFilterOptsFromCollection(tf *TaskCollection, projectView *ProjectVie
 
 	opts = &taskSearchOptions{
 		sortby:             sort,
+		userProvidedSort:   len(tf.SortBy) > 0,
 		filterIncludeNulls: tf.FilterIncludeNulls,
 		filter:             tf.Filter,
 		filterTimezone:     tf.FilterTimezone,
diff --git a/pkg/models/task_search.go b/pkg/models/task_search.go
index 07da3f809..e6d06f4e8 100644
--- a/pkg/models/task_search.go
+++ b/pkg/models/task_search.go
@@ -353,10 +353,22 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	limit, start := getLimitFromPageIndex(opts.page, opts.perPage)
 	cond := builder.And(builder.Or(projectIDCond, favoritesCond), where, filterCond)
 
+	// ParadeDB exposes the BM25 relevance score via pdb.score(<key_field>) for any
+	// query containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies).
+	// When searching without an explicit user sort, order by relevance so tasks
+	// matching all query words rank above tasks matching only some. This is
+	// ParadeDB-only: pdb.score is invalid SQL on sqlite/mysql/plain postgres.
+	rankByRelevance := db.ParadeDBAvailable() && opts.search != "" && !opts.userProvidedSort
+
 	var distinct = "tasks.*"
 	if strings.Contains(orderby, "task_positions.") {
 		distinct += ", task_positions.position"
 	}
+	if rankByRelevance {
+		// DISTINCT requires every ORDER BY expression to appear in the SELECT list.
+		distinct += ", pdb.score(tasks.id)"
+		orderby = "pdb.score(tasks.id) DESC, " + orderby
+	}
 
 	var expandSubtasks = false
 	for _, expandable := range opts.expand {
@@ -374,9 +386,15 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		))
 	}
 
-	query := d.s.
-		Distinct(distinct).
-		Where(cond)
+	query := d.s.Where(cond)
+	if rankByRelevance {
+		// xorm's Distinct() quotes each column, which corrupts the pdb.score(tasks.id)
+		// function call. Select() passes the raw column list through untouched, and
+		// Distinct() (no args) still emits the DISTINCT keyword.
+		query = query.Select(distinct).Distinct()
+	} else {
+		query = query.Distinct(distinct)
+	}
 	if limit > 0 {
 		query = query.Limit(limit, start)
 	}
diff --git a/pkg/models/task_search_test.go b/pkg/models/task_search_test.go
index 83e52700a..e5af2e95d 100644
--- a/pkg/models/task_search_test.go
+++ b/pkg/models/task_search_test.go
@@ -54,3 +54,54 @@ func TestKanbanViewBucketFiltering(t *testing.T) {
 		assert.NotContains(t, taskBuckets, id)
 	}
 }
+
+// TestTaskSearchRelevanceRanking verifies that a multi-word search ranks the task
+// matching all words above tasks matching only some. The ranking is BM25-based and
+// therefore only enforced on ParadeDB; on other databases we only assert that the
+// matching tasks are returned (no order guarantee), keeping the test green across
+// the whole CI database matrix.
+func TestTaskSearchRelevanceRanking(t *testing.T) {
+	db.LoadAndAssertFixtures(t)
+	s := db.NewSession()
+	defer s.Close()
+
+	usr := &user.User{ID: 1}
+
+	allWords := &Task{Title: "Backup server migration", ProjectID: 1}
+	require.NoError(t, allWords.Create(s, usr))
+	oneWordA := &Task{Title: "Backup of old files", ProjectID: 1}
+	require.NoError(t, oneWordA.Create(s, usr))
+	oneWordB := &Task{Title: "server room booking", ProjectID: 1}
+	require.NoError(t, oneWordB.Create(s, usr))
+
+	assertRelevanceRanked := func(t *testing.T, tc *TaskCollection) {
+		got, _, _, err := tc.ReadAll(s, usr, "backup server", 0, 50)
+		require.NoError(t, err)
+
+		gotTasks, is := got.([]*Task)
+		require.True(t, is)
+
+		gotIDs := make([]int64, len(gotTasks))
+		for i, tsk := range gotTasks {
+			gotIDs[i] = tsk.ID
+		}
+
+		require.Contains(t, gotIDs, allWords.ID, "the task matching all words should be returned")
+
+		if db.ParadeDBAvailable() {
+			require.NotEmpty(t, gotTasks)
+			assert.Equal(t, allWords.ID, gotTasks[0].ID, "task matching all query words should rank first by BM25 relevance")
+		}
+	}
+
+	// Without a view: plain "tasks.*, pdb.score(tasks.id)" select.
+	t.Run("no view", func(t *testing.T) {
+		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1})
+	})
+
+	// With a view: exercises the task_positions LEFT JOIN, which adds
+	// task_positions.position to the DISTINCT select alongside pdb.score(tasks.id).
+	t.Run("list view", func(t *testing.T) {
+		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
+	})
+}
diff --git a/pkg/models/tasks.go b/pkg/models/tasks.go
index 978a0f850..262d2269d 100644
--- a/pkg/models/tasks.go
+++ b/pkg/models/tasks.go
@@ -214,6 +214,10 @@ type taskSearchOptions struct {
 	projectIDs         []int64
 	expand             []TaskCollectionExpandable
 	projectViewID      int64
+
+	// userProvidedSort distinguishes an explicit sort_by from the id/position
+	// defaults appended later, so relevance ordering only replaces the default sort.
+	userProvidedSort bool
 }
 
 // ReadAll is a dummy function to still have that endpoint documented

From 116fb1e2e0c535884926fd90bab9fee8bd2f7b34 Mon Sep 17 00:00:00 2001
From: kolaente <k@knt.li>
Date: Fri, 19 Jun 2026 22:52:26 +0200
Subject: [PATCH 2/5] fix(search): rank exact task-index match before BM25 text
 relevance on ParadeDB

The BM25 relevance ranking added `pdb.score(tasks.id)` to the search SELECT
and ORDER BY. ParadeDB can only compute a score for a pure-ParadeDB query
shape, so two cases produced "pq: Unsupported query shape":

1. A numeric search (e.g. "#17") OR's the ParadeDB `|||` operators with a
   plain `"index" = N` equality in the same boolean group. Scoring that mixed
   group is unsupported.
2. When favorites are in scope, the `project_id IN (...) OR id IN (<favorites
   subquery>)` predicate is unsupported under pdb.score regardless of how the
   subquery is expressed (OR or UNION) - it just was never exercised because
   the ranking tests searched a single project with no favorites.

Both are now handled so each query ParadeDB scores is a supported shape:

- Numeric search runs as two arms: an exact `index = N` arm (no score, ranked
  first) and a text `|||` arm scored by pdb.score DESC. The arms are merged in
  Go (index matches first, deduped by task id) and paginated in memory; the
  count query keeps the combined `OR index = N` predicate (no score), which is
  a supported shape, so totalItems stays correct.
- The relevance arms reach favorites through a LEFT JOIN and scope on the
  joined column (`rank_favorites.entity_id IS NOT NULL`) instead of an
  id-IN-subquery, which ParadeDB can score.

Non-numeric (pure text) searches keep the single pdb.score-ordered query.
Non-ParadeDB databases are unchanged (no pdb.score, no ranking).

TestTaskSearchRelevanceRankingNumericIndex covers the numeric case: on
ParadeDB the exact-index task ranks first, then text matches by relevance; on
other databases it only asserts the matches are returned.

Validated against the CI-pinned ParadeDB image (paradedb 0.21.12): the full
pkg/models and pkg/webtests suites pass, including
TestTaskCollection_ReadAll/search_for_task_index and the HTTP search tests.
---
 pkg/models/task_search.go      | 196 ++++++++++++++++++++++++---------
 pkg/models/task_search_test.go |  60 ++++++++++
 2 files changed, 205 insertions(+), 51 deletions(-)

diff --git a/pkg/models/task_search.go b/pkg/models/task_search.go
index e6d06f4e8..f3e137d84 100644
--- a/pkg/models/task_search.go
+++ b/pkg/models/task_search.go
@@ -320,11 +320,17 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 
 	// Then return all tasks for that projects
 	var where builder.Cond
+	// textSearchCond holds only the ParadeDB/ILIKE title+description match, kept
+	// separate from the index-equality match so the relevance ranking path can
+	// score a pure-ParadeDB query (see rankByRelevance below).
+	var textSearchCond builder.Cond
+	var searchIndex int64
 
 	if opts.search != "" {
-		where = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
+		textSearchCond = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
+		where = textSearchCond
 
-		searchIndex := getTaskIndexFromSearchString(opts.search)
+		searchIndex = getTaskIndexFromSearchString(opts.search)
 		if searchIndex > 0 {
 			where = builder.Or(where, builder.Eq{"`index`": searchIndex})
 		}
@@ -350,8 +356,10 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		favoritesCond = builder.In("tasks.id", favCond)
 	}
 
+	scopeCond := builder.Or(projectIDCond, favoritesCond)
+
 	limit, start := getLimitFromPageIndex(opts.page, opts.perPage)
-	cond := builder.And(builder.Or(projectIDCond, favoritesCond), where, filterCond)
+	cond := builder.And(scopeCond, where, filterCond)
 
 	// ParadeDB exposes the BM25 relevance score via pdb.score(<key_field>) for any
 	// query containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies).
@@ -360,15 +368,20 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	// ParadeDB-only: pdb.score is invalid SQL on sqlite/mysql/plain postgres.
 	rankByRelevance := db.ParadeDBAvailable() && opts.search != "" && !opts.userProvidedSort
 
+	// ParadeDB's pdb.score() rejects an `id IN (<subquery>)` favorites scope (whether
+	// expressed as OR or UNION) as an unsupported query shape, so the relevance arms
+	// reach favorites through a LEFT JOIN and scope on the joined column instead,
+	// which it can score. Only relevant when favorites are part of the scope.
+	rankFavoritesJoin := rankByRelevance && d.hasFavoritesProject
+	rankScopeCond := scopeCond
+	if rankFavoritesJoin {
+		rankScopeCond = builder.Or(projectIDCond, builder.Expr("rank_favorites.entity_id IS NOT NULL"))
+	}
+
 	var distinct = "tasks.*"
 	if strings.Contains(orderby, "task_positions.") {
 		distinct += ", task_positions.position"
 	}
-	if rankByRelevance {
-		// DISTINCT requires every ORDER BY expression to appear in the SELECT list.
-		distinct += ", pdb.score(tasks.id)"
-		orderby = "pdb.score(tasks.id) DESC, " + orderby
-	}
 
 	var expandSubtasks = false
 	for _, expandable := range opts.expand {
@@ -378,56 +391,124 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		}
 	}
 
-	if expandSubtasks {
-		cond = builder.And(cond, builder.Or(
-			builder.IsNull{"task_relations.id"},
-			builder.IsNull{"parent_tasks.id"},
-			builder.Expr("parent_tasks.project_id != tasks.project_id"),
-		))
-	}
-
-	query := d.s.Where(cond)
-	if rankByRelevance {
-		// xorm's Distinct() quotes each column, which corrupts the pdb.score(tasks.id)
-		// function call. Select() passes the raw column list through untouched, and
-		// Distinct() (no args) still emits the DISTINCT keyword.
-		query = query.Select(distinct).Distinct()
-	} else {
-		query = query.Distinct(distinct)
-	}
-	if limit > 0 {
-		query = query.Limit(limit, start)
-	}
-
-	for _, param := range opts.sortby {
-		if param.sortBy == taskPropertyPosition {
-			query = query.Join("LEFT", "task_positions", "task_positions.task_id = tasks.id AND task_positions.project_view_id = ?", param.projectViewID)
-			break
+	// addJoins applies the same LEFT JOINs the count query and every fetch arm
+	// rely on (position sort, bucket filter, subtask expansion).
+	addJoins := func(query *xorm.Session) *xorm.Session {
+		for _, param := range opts.sortby {
+			if param.sortBy == taskPropertyPosition {
+				query = query.Join("LEFT", "task_positions", "task_positions.task_id = tasks.id AND task_positions.project_view_id = ?", param.projectViewID)
+				break
+			}
 		}
+		if joinTaskBuckets {
+			joinCond := "task_buckets.task_id = tasks.id"
+			if opts.projectViewID > 0 {
+				joinCond += " AND task_buckets.project_view_id = ?"
+				query = query.Join("LEFT", "task_buckets", joinCond, opts.projectViewID)
+			} else {
+				query = query.Join("LEFT", "task_buckets", joinCond)
+			}
+		}
+		if expandSubtasks {
+			query = query.
+				Join("LEFT", "task_relations", "tasks.id = task_relations.task_id and task_relations.relation_kind = 'parenttask'").
+				Join("LEFT", "tasks parent_tasks", "task_relations.other_task_id = parent_tasks.id")
+		}
+		return query
 	}
 
-	if joinTaskBuckets {
-		joinCond := "task_buckets.task_id = tasks.id"
-		if opts.projectViewID > 0 {
-			joinCond += " AND task_buckets.project_view_id = ?"
-			query = query.Join("LEFT", "task_buckets", joinCond, opts.projectViewID)
+	subtaskParentCond := builder.Or(
+		builder.IsNull{"task_relations.id"},
+		builder.IsNull{"parent_tasks.id"},
+		builder.Expr("parent_tasks.project_id != tasks.project_id"),
+	)
+	if expandSubtasks {
+		cond = builder.And(cond, subtaskParentCond)
+	}
+
+	// fetchTasks runs a single fetch arm: it builds the DISTINCT select (raw, so
+	// xorm doesn't quote-corrupt the pdb.score function call), applies the joins
+	// and the given order. paginate=false fetches every matching row so the caller
+	// can merge multiple arms and slice the combined result in Go.
+	fetchTasks := func(armCond builder.Cond, selectCols, armOrderby string, paginate bool) ([]*Task, error) {
+		query := d.s.Where(armCond)
+		if selectCols == distinct {
+			query = query.Distinct(selectCols)
 		} else {
-			query = query.Join("LEFT", "task_buckets", joinCond)
+			// Select() passes the raw column list through untouched while Distinct()
+			// (no args) still emits the DISTINCT keyword.
+			query = query.Select(selectCols).Distinct()
 		}
-	}
-	if expandSubtasks {
-		query = query.
-			Join("LEFT", "task_relations", "tasks.id = task_relations.task_id and task_relations.relation_kind = 'parenttask'").
-			Join("LEFT", "tasks parent_tasks", "task_relations.other_task_id = parent_tasks.id")
+		if paginate && limit > 0 {
+			query = query.Limit(limit, start)
+		}
+		if rankFavoritesJoin {
+			query = query.Join("LEFT", "favorites rank_favorites", "rank_favorites.entity_id = tasks.id AND rank_favorites.user_id = ? AND rank_favorites.kind = ?", d.a.GetID(), FavoriteKindTask)
+		}
+		query = addJoins(query)
+
+		armTasks := []*Task{}
+		if err := query.OrderBy(armOrderby).Find(&armTasks); err != nil {
+			sql, vals := query.LastSQL()
+			return nil, fmt.Errorf("could not fetch tasks, error was '%w', sql: '%v', values: %v", err, sql, vals)
+		}
+		return armTasks, nil
 	}
 
-	tasks = []*Task{}
-	err = query.
-		OrderBy(orderby).
-		Find(&tasks)
-	if err != nil {
-		sql, vals := query.LastSQL()
-		return nil, 0, fmt.Errorf("could not fetch tasks, error was '%w', sql: '%v', values: %v", err, sql, vals)
+	rankCondWith := func(searchCond builder.Cond) builder.Cond {
+		c := builder.And(rankScopeCond, searchCond, filterCond)
+		if expandSubtasks {
+			c = builder.And(c, subtaskParentCond)
+		}
+		return c
+	}
+
+	switch {
+	case rankByRelevance && searchIndex > 0:
+		// A numeric search matches both the task index and the fuzzy text. pdb.score
+		// can only score a pure-ParadeDB query, so a `||| ... OR index = N` group is
+		// an unsupported query shape on ParadeDB. Run two supported arms instead and
+		// rank exact index matches first, then text matches by relevance.
+		indexTasks, err := fetchTasks(rankCondWith(builder.Eq{"`index`": searchIndex}), distinct, orderby, false)
+		if err != nil {
+			return nil, 0, err
+		}
+
+		textTasks, err := fetchTasks(rankCondWith(textSearchCond), distinct+", pdb.score(tasks.id)", "pdb.score(tasks.id) DESC, "+orderby, false)
+		if err != nil {
+			return nil, 0, err
+		}
+
+		// Exact index matches rank first; dedup a task matching both arms in favour
+		// of its index-match position.
+		seen := make(map[int64]bool, len(indexTasks)+len(textTasks))
+		merged := make([]*Task, 0, len(indexTasks)+len(textTasks))
+		for _, t := range indexTasks {
+			if !seen[t.ID] {
+				seen[t.ID] = true
+				merged = append(merged, t)
+			}
+		}
+		for _, t := range textTasks {
+			if !seen[t.ID] {
+				seen[t.ID] = true
+				merged = append(merged, t)
+			}
+		}
+
+		tasks = paginateInMemory(merged, limit, start)
+	case rankByRelevance:
+		// Pure text search: a single pdb.score-ordered query over the score-able
+		// scope is a supported shape.
+		tasks, err = fetchTasks(rankCondWith(textSearchCond), distinct+", pdb.score(tasks.id)", "pdb.score(tasks.id) DESC, "+orderby, true)
+		if err != nil {
+			return nil, 0, err
+		}
+	default:
+		tasks, err = fetchTasks(cond, distinct, orderby, true)
+		if err != nil {
+			return nil, 0, err
+		}
 	}
 
 	// fetch subtasks when expanding
@@ -503,3 +584,16 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	}
 	return
 }
+
+// paginateInMemory slices an already-ordered result set. limit == 0 means "no
+// limit" (return everything from start onwards), matching getLimitFromPageIndex.
+func paginateInMemory(tasks []*Task, limit, start int) []*Task {
+	if start >= len(tasks) {
+		return []*Task{}
+	}
+	tasks = tasks[start:]
+	if limit > 0 && limit < len(tasks) {
+		tasks = tasks[:limit]
+	}
+	return tasks
+}
diff --git a/pkg/models/task_search_test.go b/pkg/models/task_search_test.go
index e5af2e95d..59eab6051 100644
--- a/pkg/models/task_search_test.go
+++ b/pkg/models/task_search_test.go
@@ -17,6 +17,7 @@
 package models
 
 import (
+	"strconv"
 	"testing"
 
 	"code.vikunja.io/api/pkg/db"
@@ -105,3 +106,62 @@ func TestTaskSearchRelevanceRanking(t *testing.T) {
 		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
 	})
 }
+
+// TestTaskSearchRelevanceRankingNumericIndex covers a numeric search (e.g. "#42"):
+// it matches both a task by its per-project index and tasks whose title/description
+// contain that number via fuzzy text search. On ParadeDB the exact-index task must
+// rank first, then the text matches by relevance. This is the case that combines an
+// `index = N` equality with the ParadeDB ||| operators; scoring such a mixed boolean
+// group is an unsupported query shape, so it is run as two arms (index, then text).
+func TestTaskSearchRelevanceRankingNumericIndex(t *testing.T) {
+	db.LoadAndAssertFixtures(t)
+	s := db.NewSession()
+	defer s.Close()
+
+	usr := &user.User{ID: 1}
+
+	// The exact-index task: its index is what we search for. Its title deliberately
+	// does not contain the number, so it can only be found by the index match.
+	exactIndex := &Task{Title: "Quarterly planning offsite", ProjectID: 1}
+	require.NoError(t, exactIndex.Create(s, usr))
+	require.NotZero(t, exactIndex.Index)
+
+	indexStr := strconv.FormatInt(exactIndex.Index, 10)
+	search := "#" + indexStr
+
+	// Text matches: their titles contain the searched number so the fuzzy text arm
+	// returns them, but they are not the exact-index task.
+	textA := &Task{Title: "Review ticket " + search + " backlog", ProjectID: 1}
+	require.NoError(t, textA.Create(s, usr))
+	textB := &Task{Title: "Notes about " + search, ProjectID: 1}
+	require.NoError(t, textB.Create(s, usr))
+
+	assertIndexFirst := func(t *testing.T, tc *TaskCollection) {
+		got, _, _, err := tc.ReadAll(s, usr, search, 0, 50)
+		require.NoError(t, err)
+
+		gotTasks, is := got.([]*Task)
+		require.True(t, is)
+
+		gotIDs := make([]int64, len(gotTasks))
+		for i, tsk := range gotTasks {
+			gotIDs[i] = tsk.ID
+		}
+
+		require.Contains(t, gotIDs, exactIndex.ID, "the exact-index task should be returned")
+
+		if db.ParadeDBAvailable() {
+			require.NotEmpty(t, gotTasks)
+			assert.Equal(t, exactIndex.ID, gotTasks[0].ID, "the exact-index match should rank first")
+			assert.Contains(t, gotIDs, textA.ID, "text matches should also be returned, ranked after the index match")
+		}
+	}
+
+	t.Run("no view", func(t *testing.T) {
+		assertIndexFirst(t, &TaskCollection{ProjectID: 1})
+	})
+
+	t.Run("list view", func(t *testing.T) {
+		assertIndexFirst(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
+	})
+}

From d93e98f76b43fe8c0e44024985d3d7cb20dc12a1 Mon Sep 17 00:00:00 2001
From: kolaente <k@knt.li>
Date: Fri, 19 Jun 2026 23:14:51 +0200
Subject: [PATCH 3/5] fix(search): qualify the task index column to avoid
 ambiguity with the parent-task join

---
 pkg/models/task_search.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/models/task_search.go b/pkg/models/task_search.go
index f3e137d84..a8884ba7f 100644
--- a/pkg/models/task_search.go
+++ b/pkg/models/task_search.go
@@ -332,7 +332,7 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 
 		searchIndex = getTaskIndexFromSearchString(opts.search)
 		if searchIndex > 0 {
-			where = builder.Or(where, builder.Eq{"`index`": searchIndex})
+			where = builder.Or(where, builder.Eq{"tasks.`index`": searchIndex})
 		}
 	}
 
@@ -469,7 +469,7 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		// can only score a pure-ParadeDB query, so a `||| ... OR index = N` group is
 		// an unsupported query shape on ParadeDB. Run two supported arms instead and
 		// rank exact index matches first, then text matches by relevance.
-		indexTasks, err := fetchTasks(rankCondWith(builder.Eq{"`index`": searchIndex}), distinct, orderby, false)
+		indexTasks, err := fetchTasks(rankCondWith(builder.Eq{"tasks.`index`": searchIndex}), distinct, orderby, false)
 		if err != nil {
 			return nil, 0, err
 		}

From 78dde2fb18c9a13e83c0cacf720ea70621fc0691 Mon Sep 17 00:00:00 2001
From: kolaente <k@knt.li>
Date: Fri, 19 Jun 2026 23:14:55 +0200
Subject: [PATCH 4/5] fix(search): derive userProvidedSort from the effective
 sort so relevance ranking applies in negative-id views

---
 pkg/models/task_collection.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/models/task_collection.go b/pkg/models/task_collection.go
index 442c3e657..fdf1f3e09 100644
--- a/pkg/models/task_collection.go
+++ b/pkg/models/task_collection.go
@@ -142,7 +142,7 @@ func getTaskFilterOptsFromCollection(tf *TaskCollection, projectView *ProjectVie
 
 	opts = &taskSearchOptions{
 		sortby:             sort,
-		userProvidedSort:   len(tf.SortBy) > 0,
+		userProvidedSort:   len(sort) > 0,
 		filterIncludeNulls: tf.FilterIncludeNulls,
 		filter:             tf.Filter,
 		filterTimezone:     tf.FilterTimezone,

From cefa42da86fec92057b648eb6e757b2f6c7cc186 Mon Sep 17 00:00:00 2001
From: kolaente <k@knt.li>
Date: Sun, 21 Jun 2026 18:49:41 +0200
Subject: [PATCH 5/5] refactor(search): limit BM25 relevance ranking to
 pure-text searches

Rank ParadeDB search results by BM25 relevance only for pure-text searches
over a plain project scope. Numeric searches (the `OR index = N` branch) and
the Favorites view (the `id IN (<subquery>)` scope) keep the default ordering
(unranked, as on main): pdb.score rejects both as unsupported query shapes, and
the contortions previously needed to score them (two-arm numeric merge with
in-memory pagination, a favorites LEFT JOIN) added far more complexity than the
ranking was worth. Neither path was ranked before this PR, so leaving them at
the default order is no regression.
---
 pkg/models/task_collection.go  |   2 +-
 pkg/models/task_search.go      | 215 ++++++++++-----------------------
 pkg/models/task_search_test.go |  60 ---------
 3 files changed, 64 insertions(+), 213 deletions(-)

diff --git a/pkg/models/task_collection.go b/pkg/models/task_collection.go
index fdf1f3e09..442c3e657 100644
--- a/pkg/models/task_collection.go
+++ b/pkg/models/task_collection.go
@@ -142,7 +142,7 @@ func getTaskFilterOptsFromCollection(tf *TaskCollection, projectView *ProjectVie
 
 	opts = &taskSearchOptions{
 		sortby:             sort,
-		userProvidedSort:   len(sort) > 0,
+		userProvidedSort:   len(tf.SortBy) > 0,
 		filterIncludeNulls: tf.FilterIncludeNulls,
 		filter:             tf.Filter,
 		filterTimezone:     tf.FilterTimezone,
diff --git a/pkg/models/task_search.go b/pkg/models/task_search.go
index a8884ba7f..5eb7f2b2a 100644
--- a/pkg/models/task_search.go
+++ b/pkg/models/task_search.go
@@ -320,17 +320,11 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 
 	// Then return all tasks for that projects
 	var where builder.Cond
-	// textSearchCond holds only the ParadeDB/ILIKE title+description match, kept
-	// separate from the index-equality match so the relevance ranking path can
-	// score a pure-ParadeDB query (see rankByRelevance below).
-	var textSearchCond builder.Cond
-	var searchIndex int64
 
+	searchIndex := getTaskIndexFromSearchString(opts.search)
 	if opts.search != "" {
-		textSearchCond = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
-		where = textSearchCond
+		where = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")
 
-		searchIndex = getTaskIndexFromSearchString(opts.search)
 		if searchIndex > 0 {
 			where = builder.Or(where, builder.Eq{"tasks.`index`": searchIndex})
 		}
@@ -356,27 +350,8 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		favoritesCond = builder.In("tasks.id", favCond)
 	}
 
-	scopeCond := builder.Or(projectIDCond, favoritesCond)
-
 	limit, start := getLimitFromPageIndex(opts.page, opts.perPage)
-	cond := builder.And(scopeCond, where, filterCond)
-
-	// ParadeDB exposes the BM25 relevance score via pdb.score(<key_field>) for any
-	// query containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies).
-	// When searching without an explicit user sort, order by relevance so tasks
-	// matching all query words rank above tasks matching only some. This is
-	// ParadeDB-only: pdb.score is invalid SQL on sqlite/mysql/plain postgres.
-	rankByRelevance := db.ParadeDBAvailable() && opts.search != "" && !opts.userProvidedSort
-
-	// ParadeDB's pdb.score() rejects an `id IN (<subquery>)` favorites scope (whether
-	// expressed as OR or UNION) as an unsupported query shape, so the relevance arms
-	// reach favorites through a LEFT JOIN and scope on the joined column instead,
-	// which it can score. Only relevant when favorites are part of the scope.
-	rankFavoritesJoin := rankByRelevance && d.hasFavoritesProject
-	rankScopeCond := scopeCond
-	if rankFavoritesJoin {
-		rankScopeCond = builder.Or(projectIDCond, builder.Expr("rank_favorites.entity_id IS NOT NULL"))
-	}
+	cond := builder.And(builder.Or(projectIDCond, favoritesCond), where, filterCond)
 
 	var distinct = "tasks.*"
 	if strings.Contains(orderby, "task_positions.") {
@@ -391,124 +366,73 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		}
 	}
 
-	// addJoins applies the same LEFT JOINs the count query and every fetch arm
-	// rely on (position sort, bucket filter, subtask expansion).
-	addJoins := func(query *xorm.Session) *xorm.Session {
-		for _, param := range opts.sortby {
-			if param.sortBy == taskPropertyPosition {
-				query = query.Join("LEFT", "task_positions", "task_positions.task_id = tasks.id AND task_positions.project_view_id = ?", param.projectViewID)
-				break
-			}
-		}
-		if joinTaskBuckets {
-			joinCond := "task_buckets.task_id = tasks.id"
-			if opts.projectViewID > 0 {
-				joinCond += " AND task_buckets.project_view_id = ?"
-				query = query.Join("LEFT", "task_buckets", joinCond, opts.projectViewID)
-			} else {
-				query = query.Join("LEFT", "task_buckets", joinCond)
-			}
-		}
-		if expandSubtasks {
-			query = query.
-				Join("LEFT", "task_relations", "tasks.id = task_relations.task_id and task_relations.relation_kind = 'parenttask'").
-				Join("LEFT", "tasks parent_tasks", "task_relations.other_task_id = parent_tasks.id")
-		}
-		return query
-	}
-
-	subtaskParentCond := builder.Or(
-		builder.IsNull{"task_relations.id"},
-		builder.IsNull{"parent_tasks.id"},
-		builder.Expr("parent_tasks.project_id != tasks.project_id"),
-	)
 	if expandSubtasks {
-		cond = builder.And(cond, subtaskParentCond)
+		cond = builder.And(cond, builder.Or(
+			builder.IsNull{"task_relations.id"},
+			builder.IsNull{"parent_tasks.id"},
+			builder.Expr("parent_tasks.project_id != tasks.project_id"),
+		))
 	}
 
-	// fetchTasks runs a single fetch arm: it builds the DISTINCT select (raw, so
-	// xorm doesn't quote-corrupt the pdb.score function call), applies the joins
-	// and the given order. paginate=false fetches every matching row so the caller
-	// can merge multiple arms and slice the combined result in Go.
-	fetchTasks := func(armCond builder.Cond, selectCols, armOrderby string, paginate bool) ([]*Task, error) {
-		query := d.s.Where(armCond)
-		if selectCols == distinct {
-			query = query.Distinct(selectCols)
+	// ParadeDB exposes the BM25 relevance score via pdb.score(tasks.id) for a query
+	// containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies). When
+	// searching without an explicit user sort, order by relevance so tasks matching
+	// all query words rank above tasks matching only some.
+	//
+	// This is limited to pure-text searches over a plain project scope: numeric
+	// searches add an `OR index = N` branch and the Favorites view scopes on an
+	// `id IN (<subquery>)`, both of which pdb.score rejects as unsupported query
+	// shapes. Those keep the default ordering (unranked). pdb.score is also invalid
+	// SQL on sqlite/mysql/plain postgres, hence the ParadeDBAvailable() gate.
+	rankByRelevance := db.ParadeDBAvailable() &&
+		opts.search != "" &&
+		!opts.userProvidedSort &&
+		searchIndex == 0 &&
+		!d.hasFavoritesProject
+
+	query := d.s.Where(cond)
+	if rankByRelevance {
+		// Select() passes the raw column list through untouched while Distinct()
+		// (no args) still emits DISTINCT. Distinct("tasks.*, pdb.score(tasks.id)")
+		// would quote-corrupt the function call into "pdb"."score(tasks"."id)".
+		query = query.Select(distinct + ", pdb.score(tasks.id)").Distinct()
+		orderby = "pdb.score(tasks.id) DESC, " + orderby
+	} else {
+		query = query.Distinct(distinct)
+	}
+	if limit > 0 {
+		query = query.Limit(limit, start)
+	}
+
+	for _, param := range opts.sortby {
+		if param.sortBy == taskPropertyPosition {
+			query = query.Join("LEFT", "task_positions", "task_positions.task_id = tasks.id AND task_positions.project_view_id = ?", param.projectViewID)
+			break
+		}
+	}
+
+	if joinTaskBuckets {
+		joinCond := "task_buckets.task_id = tasks.id"
+		if opts.projectViewID > 0 {
+			joinCond += " AND task_buckets.project_view_id = ?"
+			query = query.Join("LEFT", "task_buckets", joinCond, opts.projectViewID)
 		} else {
-			// Select() passes the raw column list through untouched while Distinct()
-			// (no args) still emits the DISTINCT keyword.
-			query = query.Select(selectCols).Distinct()
+			query = query.Join("LEFT", "task_buckets", joinCond)
 		}
-		if paginate && limit > 0 {
-			query = query.Limit(limit, start)
-		}
-		if rankFavoritesJoin {
-			query = query.Join("LEFT", "favorites rank_favorites", "rank_favorites.entity_id = tasks.id AND rank_favorites.user_id = ? AND rank_favorites.kind = ?", d.a.GetID(), FavoriteKindTask)
-		}
-		query = addJoins(query)
-
-		armTasks := []*Task{}
-		if err := query.OrderBy(armOrderby).Find(&armTasks); err != nil {
-			sql, vals := query.LastSQL()
-			return nil, fmt.Errorf("could not fetch tasks, error was '%w', sql: '%v', values: %v", err, sql, vals)
-		}
-		return armTasks, nil
+	}
+	if expandSubtasks {
+		query = query.
+			Join("LEFT", "task_relations", "tasks.id = task_relations.task_id and task_relations.relation_kind = 'parenttask'").
+			Join("LEFT", "tasks parent_tasks", "task_relations.other_task_id = parent_tasks.id")
 	}
 
-	rankCondWith := func(searchCond builder.Cond) builder.Cond {
-		c := builder.And(rankScopeCond, searchCond, filterCond)
-		if expandSubtasks {
-			c = builder.And(c, subtaskParentCond)
-		}
-		return c
-	}
-
-	switch {
-	case rankByRelevance && searchIndex > 0:
-		// A numeric search matches both the task index and the fuzzy text. pdb.score
-		// can only score a pure-ParadeDB query, so a `||| ... OR index = N` group is
-		// an unsupported query shape on ParadeDB. Run two supported arms instead and
-		// rank exact index matches first, then text matches by relevance.
-		indexTasks, err := fetchTasks(rankCondWith(builder.Eq{"tasks.`index`": searchIndex}), distinct, orderby, false)
-		if err != nil {
-			return nil, 0, err
-		}
-
-		textTasks, err := fetchTasks(rankCondWith(textSearchCond), distinct+", pdb.score(tasks.id)", "pdb.score(tasks.id) DESC, "+orderby, false)
-		if err != nil {
-			return nil, 0, err
-		}
-
-		// Exact index matches rank first; dedup a task matching both arms in favour
-		// of its index-match position.
-		seen := make(map[int64]bool, len(indexTasks)+len(textTasks))
-		merged := make([]*Task, 0, len(indexTasks)+len(textTasks))
-		for _, t := range indexTasks {
-			if !seen[t.ID] {
-				seen[t.ID] = true
-				merged = append(merged, t)
-			}
-		}
-		for _, t := range textTasks {
-			if !seen[t.ID] {
-				seen[t.ID] = true
-				merged = append(merged, t)
-			}
-		}
-
-		tasks = paginateInMemory(merged, limit, start)
-	case rankByRelevance:
-		// Pure text search: a single pdb.score-ordered query over the score-able
-		// scope is a supported shape.
-		tasks, err = fetchTasks(rankCondWith(textSearchCond), distinct+", pdb.score(tasks.id)", "pdb.score(tasks.id) DESC, "+orderby, true)
-		if err != nil {
-			return nil, 0, err
-		}
-	default:
-		tasks, err = fetchTasks(cond, distinct, orderby, true)
-		if err != nil {
-			return nil, 0, err
-		}
+	tasks = []*Task{}
+	err = query.
+		OrderBy(orderby).
+		Find(&tasks)
+	if err != nil {
+		sql, vals := query.LastSQL()
+		return nil, 0, fmt.Errorf("could not fetch tasks, error was '%w', sql: '%v', values: %v", err, sql, vals)
 	}
 
 	// fetch subtasks when expanding
@@ -584,16 +508,3 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	}
 	return
 }
-
-// paginateInMemory slices an already-ordered result set. limit == 0 means "no
-// limit" (return everything from start onwards), matching getLimitFromPageIndex.
-func paginateInMemory(tasks []*Task, limit, start int) []*Task {
-	if start >= len(tasks) {
-		return []*Task{}
-	}
-	tasks = tasks[start:]
-	if limit > 0 && limit < len(tasks) {
-		tasks = tasks[:limit]
-	}
-	return tasks
-}
diff --git a/pkg/models/task_search_test.go b/pkg/models/task_search_test.go
index 59eab6051..e5af2e95d 100644
--- a/pkg/models/task_search_test.go
+++ b/pkg/models/task_search_test.go
@@ -17,7 +17,6 @@
 package models
 
 import (
-	"strconv"
 	"testing"
 
 	"code.vikunja.io/api/pkg/db"
@@ -106,62 +105,3 @@ func TestTaskSearchRelevanceRanking(t *testing.T) {
 		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
 	})
 }
-
-// TestTaskSearchRelevanceRankingNumericIndex covers a numeric search (e.g. "#42"):
-// it matches both a task by its per-project index and tasks whose title/description
-// contain that number via fuzzy text search. On ParadeDB the exact-index task must
-// rank first, then the text matches by relevance. This is the case that combines an
-// `index = N` equality with the ParadeDB ||| operators; scoring such a mixed boolean
-// group is an unsupported query shape, so it is run as two arms (index, then text).
-func TestTaskSearchRelevanceRankingNumericIndex(t *testing.T) {
-	db.LoadAndAssertFixtures(t)
-	s := db.NewSession()
-	defer s.Close()
-
-	usr := &user.User{ID: 1}
-
-	// The exact-index task: its index is what we search for. Its title deliberately
-	// does not contain the number, so it can only be found by the index match.
-	exactIndex := &Task{Title: "Quarterly planning offsite", ProjectID: 1}
-	require.NoError(t, exactIndex.Create(s, usr))
-	require.NotZero(t, exactIndex.Index)
-
-	indexStr := strconv.FormatInt(exactIndex.Index, 10)
-	search := "#" + indexStr
-
-	// Text matches: their titles contain the searched number so the fuzzy text arm
-	// returns them, but they are not the exact-index task.
-	textA := &Task{Title: "Review ticket " + search + " backlog", ProjectID: 1}
-	require.NoError(t, textA.Create(s, usr))
-	textB := &Task{Title: "Notes about " + search, ProjectID: 1}
-	require.NoError(t, textB.Create(s, usr))
-
-	assertIndexFirst := func(t *testing.T, tc *TaskCollection) {
-		got, _, _, err := tc.ReadAll(s, usr, search, 0, 50)
-		require.NoError(t, err)
-
-		gotTasks, is := got.([]*Task)
-		require.True(t, is)
-
-		gotIDs := make([]int64, len(gotTasks))
-		for i, tsk := range gotTasks {
-			gotIDs[i] = tsk.ID
-		}
-
-		require.Contains(t, gotIDs, exactIndex.ID, "the exact-index task should be returned")
-
-		if db.ParadeDBAvailable() {
-			require.NotEmpty(t, gotTasks)
-			assert.Equal(t, exactIndex.ID, gotTasks[0].ID, "the exact-index match should rank first")
-			assert.Contains(t, gotIDs, textA.ID, "text matches should also be returned, ranked after the index match")
-		}
-	}
-
-	t.Run("no view", func(t *testing.T) {
-		assertIndexFirst(t, &TaskCollection{ProjectID: 1})
-	})
-
-	t.Run("list view", func(t *testing.T) {
-		assertIndexFirst(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
-	})
-}