feat(search): omit default sort while searching so results rank by relevance

The web client always sent sort_by/order_by (the view's default sort), so the backend's userProvidedSort flag was always true and the new BM25 relevance ranking never engaged from the web app. When a search is active and the user has not explicitly chosen a sort, omit the sort entirely so the backend ranks results by relevance. An explicit user sort still suppresses ranking and non-search browsing is unchanged.
test(search): assert explicit sort_by disables relevance ranking
2026-06-21 19:36:03 +02:00 · 2026-06-21 19:24:07 +02:00 · 2026-06-21 18:49:41 +02:00 · 2026-06-19 23:14:55 +02:00 · 2026-06-19 23:14:51 +02:00 · 2026-06-19 22:52:26 +02:00
6 changed files with 212 additions and 5 deletions
--- a/frontend/src/composables/useTaskList.test.ts
+++ b/frontend/src/composables/useTaskList.test.ts
@ -0,0 +1,83 @@
+import {describe, it, expect, beforeEach, vi} from 'vitest'
+import {defineComponent, h, nextTick} from 'vue'
+import {mount, flushPromises} from '@vue/test-utils'
+import {setActivePinia, createPinia} from 'pinia'
+import {createRouter, createMemoryHistory, type Router} from 'vue-router'
+
+const getAll = vi.fn(async () => [])
+vi.mock('@/services/taskCollection', async (importOriginal) => {
+	const actual = await importOriginal<typeof import('@/services/taskCollection')>()
+	return {
+		...actual,
+		default: class {
+			loading = false
+			totalPages = 1
+			getAll = getAll
+		},
+	}
+})
+
+import {useTaskList} from './useTaskList'
+
+// The second positional argument passed to TaskCollectionService.getAll carries
+// the sort_by/order_by the backend uses to decide whether to rank by relevance.
+function lastRequestParams(): Record<string, unknown> {
+	return getAll.mock.calls.at(-1)?.[1] as Record<string, unknown>
+}
+
+async function mountTaskList(query: Record<string, string>): Promise<Router> {
+	const router = createRouter({
+		history: createMemoryHistory(),
+		routes: [{path: '/', name: 'home', component: {render: () => null}}],
+	})
+	await router.push({path: '/', query})
+	await router.isReady()
+
+	const TestComponent = defineComponent({
+		setup() {
+			useTaskList(() => 1, () => 1)
+			return () => h('div')
+		},
+	})
+
+	mount(TestComponent, {global: {plugins: [router]}})
+	await flushPromises()
+	await nextTick()
+	return router
+}
+
+describe('useTaskList sort handling for relevance ranking', () => {
+	beforeEach(() => {
+		setActivePinia(createPinia())
+		getAll.mockClear()
+	})
+
+	it('omits the sort while searching with the default sort so the backend ranks by relevance', async () => {
+		await mountTaskList({s: 'find me'})
+
+		const params = lastRequestParams()
+		expect(params.s).toBe('find me')
+		expect(params.sort_by).toEqual([])
+		expect(params.order_by).toEqual([])
+	})
+
+	it('keeps an explicit user sort while searching so the user sort is respected', async () => {
+		await mountTaskList({s: 'find me', sort: 'title:asc'})
+
+		const params = lastRequestParams()
+		expect(params.s).toBe('find me')
+		expect(params.sort_by).toEqual(['title'])
+		expect(params.order_by).toEqual(['asc'])
+	})
+
+	it('sends the default sort when not searching', async () => {
+		await mountTaskList({})
+
+		const params = lastRequestParams()
+		expect(params.s).toBe('')
+		expect(params.sort_by).not.toHaveLength(0)
+		// id always sorts last so other sort columns take precedence.
+		expect(params.sort_by).toEqual(['id'])
+		expect(params.order_by).toEqual(['desc'])
+	})
+})
--- a/frontend/src/composables/useTaskList.ts
+++ b/frontend/src/composables/useTaskList.ts
@ -122,6 +122,14 @@ export function useTaskList(
 	const allParams = computed(() => {
 		const loadParams = {...params.value}

+		// Relevance ranking only engages when no sort is sent, so omit the default
+		// sort while searching and let an explicit user sort still take precedence.
+		if (loadParams.s && !sortQuery.value) {
+			loadParams.sort_by = []
+			loadParams.order_by = []
+			return loadParams
+		}
+
 		return formatSortOrder(sortBy.value, loadParams)
 	})

--- a/pkg/models/task_collection.go
+++ b/pkg/models/task_collection.go
@ -142,6 +142,7 @@ func getTaskFilterOptsFromCollection(tf *TaskCollection, projectView *ProjectVie

 	opts = &taskSearchOptions{
 		sortby:             sort,
+		userProvidedSort:   len(tf.SortBy) > 0,
 		filterIncludeNulls: tf.FilterIncludeNulls,
 		filter:             tf.Filter,
 		filterTimezone:     tf.FilterTimezone,
--- a/pkg/models/task_search.go
+++ b/pkg/models/task_search.go
@ -321,12 +321,12 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 	// Then return all tasks for that projects
 	var where builder.Cond

+	searchIndex := getTaskIndexFromSearchString(opts.search)
 	if opts.search != "" {
 		where = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks")

-		searchIndex := getTaskIndexFromSearchString(opts.search)
 		if searchIndex > 0 {
-			where = builder.Or(where, builder.Eq{"`index`": searchIndex})
+			where = builder.Or(where, builder.Eq{"tasks.`index`": searchIndex})
 		}
 	}

@ -374,9 +374,32 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo
 		))
 	}

-	query := d.s.
-		Distinct(distinct).
-		Where(cond)
+	// ParadeDB exposes the BM25 relevance score via pdb.score(tasks.id) for a query
+	// containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies). When
+	// searching without an explicit user sort, order by relevance so tasks matching
+	// all query words rank above tasks matching only some.
+	//
+	// This is limited to pure-text searches over a plain project scope: numeric
+	// searches add an `OR index = N` branch and the Favorites view scopes on an
+	// `id IN (<subquery>)`, both of which pdb.score rejects as unsupported query
+	// shapes. Those keep the default ordering (unranked). pdb.score is also invalid
+	// SQL on sqlite/mysql/plain postgres, hence the ParadeDBAvailable() gate.
+	rankByRelevance := db.ParadeDBAvailable() &&
+		opts.search != "" &&
+		!opts.userProvidedSort &&
+		searchIndex == 0 &&
+		!d.hasFavoritesProject
+
+	query := d.s.Where(cond)
+	if rankByRelevance {
+		// Select() passes the raw column list through untouched while Distinct()
+		// (no args) still emits DISTINCT. Distinct("tasks.*, pdb.score(tasks.id)")
+		// would quote-corrupt the function call into "pdb"."score(tasks"."id)".
+		query = query.Select(distinct + ", pdb.score(tasks.id)").Distinct()
+		orderby = "pdb.score(tasks.id) DESC, " + orderby
+	} else {
+		query = query.Distinct(distinct)
+	}
 	if limit > 0 {
 		query = query.Limit(limit, start)
 	}
--- a/pkg/models/task_search_test.go
+++ b/pkg/models/task_search_test.go
@ -54,3 +54,91 @@ func TestKanbanViewBucketFiltering(t *testing.T) {
 		assert.NotContains(t, taskBuckets, id)
 	}
 }
+
+// TestTaskSearchRelevanceRanking verifies that a multi-word search ranks the task
+// matching all words above tasks matching only some. The ranking is BM25-based and
+// therefore only enforced on ParadeDB; on other databases we only assert that the
+// matching tasks are returned (no order guarantee), keeping the test green across
+// the whole CI database matrix.
+func TestTaskSearchRelevanceRanking(t *testing.T) {
+	db.LoadAndAssertFixtures(t)
+	s := db.NewSession()
+	defer s.Close()
+
+	usr := &user.User{ID: 1}
+
+	allWords := &Task{Title: "Backup server migration", ProjectID: 1}
+	require.NoError(t, allWords.Create(s, usr))
+	oneWordA := &Task{Title: "Backup of old files", ProjectID: 1}
+	require.NoError(t, oneWordA.Create(s, usr))
+	oneWordB := &Task{Title: "server room booking", ProjectID: 1}
+	require.NoError(t, oneWordB.Create(s, usr))
+
+	assertRelevanceRanked := func(t *testing.T, tc *TaskCollection) {
+		got, _, _, err := tc.ReadAll(s, usr, "backup server", 0, 50)
+		require.NoError(t, err)
+
+		gotTasks, is := got.([]*Task)
+		require.True(t, is)
+
+		gotIDs := make([]int64, len(gotTasks))
+		for i, tsk := range gotTasks {
+			gotIDs[i] = tsk.ID
+		}
+
+		require.Contains(t, gotIDs, allWords.ID, "the task matching all words should be returned")
+
+		if db.ParadeDBAvailable() {
+			require.NotEmpty(t, gotTasks)
+			assert.Equal(t, allWords.ID, gotTasks[0].ID, "task matching all query words should rank first by BM25 relevance")
+		}
+	}
+
+	// Without a view: plain "tasks.*, pdb.score(tasks.id)" select.
+	t.Run("no view", func(t *testing.T) {
+		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1})
+	})
+
+	// With a view: exercises the task_positions LEFT JOIN, which adds
+	// task_positions.position to the DISTINCT select alongside pdb.score(tasks.id).
+	t.Run("list view", func(t *testing.T) {
+		assertRelevanceRanked(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1})
+	})
+
+	// An explicit sort_by must win over relevance: with `id desc` the lowest-id
+	// task (allWords) ranks last, the opposite of what BM25 relevance would do.
+	// This locks the contract that user-provided sorting disables relevance
+	// ranking even on ParadeDB. Only ParadeDB's per-token search matches all
+	// three tasks, so the ordering contract is only asserted there (other
+	// databases ILIKE the whole phrase and match a different subset).
+	t.Run("explicit sort disables relevance ranking", func(t *testing.T) {
+		if !db.ParadeDBAvailable() {
+			t.Skip("relevance ranking only applies on ParadeDB")
+		}
+
+		tc := &TaskCollection{
+			ProjectID: 1,
+			SortBy:    []string{"id"},
+			OrderBy:   []string{"desc"},
+		}
+		got, _, _, err := tc.ReadAll(s, usr, "backup server", 0, 50)
+		require.NoError(t, err)
+
+		gotTasks, is := got.([]*Task)
+		require.True(t, is)
+
+		created := map[int64]bool{allWords.ID: true, oneWordA.ID: true, oneWordB.ID: true}
+		var orderedIDs []int64
+		for _, tsk := range gotTasks {
+			if created[tsk.ID] {
+				orderedIDs = append(orderedIDs, tsk.ID)
+			}
+		}
+
+		require.Len(t, orderedIDs, len(created), "all created tasks should match the search")
+		for i := 1; i < len(orderedIDs); i++ {
+			assert.Greater(t, orderedIDs[i-1], orderedIDs[i], "tasks must follow the explicit id-desc sort, not relevance")
+		}
+		assert.Equal(t, allWords.ID, orderedIDs[len(orderedIDs)-1], "the all-words match (lowest id) ranks last under id-desc, proving relevance was not applied")
+	})
+}
--- a/pkg/models/tasks.go
+++ b/pkg/models/tasks.go
@ -214,6 +214,10 @@ type taskSearchOptions struct {
 	projectIDs         []int64
 	expand             []TaskCollectionExpandable
 	projectViewID      int64
+
+	// userProvidedSort distinguishes an explicit sort_by from the id/position
+	// defaults appended later, so relevance ordering only replaces the default sort.
+	userProvidedSort bool
 }

 // ReadAll is a dummy function to still have that endpoint documented
Author	SHA1	Message	Date
kolaente	a2cb2826d0	feat(search): omit default sort while searching so results rank by relevance The web client always sent sort_by/order_by (the view's default sort), so the backend's userProvidedSort flag was always true and the new BM25 relevance ranking never engaged from the web app. When a search is active and the user has not explicitly chosen a sort, omit the sort entirely so the backend ranks results by relevance. An explicit user sort still suppresses ranking and non-search browsing is unchanged.	2026-06-21 19:36:03 +02:00
kolaente	6f6f91bd28	test(search): assert explicit sort_by disables relevance ranking Lock the contract that a user-provided sort_by overrides BM25 relevance: with sort_by=id order_by=desc the lowest-id task (which BM25 would rank first) ranks last, proving pdb.score is not applied. ParadeDB-gated since only its per-token search matches all three fixture tasks.	2026-06-21 19:24:07 +02:00
kolaente	cefa42da86	refactor(search): limit BM25 relevance ranking to pure-text searches Rank ParadeDB search results by BM25 relevance only for pure-text searches over a plain project scope. Numeric searches (the `OR index = N` branch) and the Favorites view (the `id IN (<subquery>)` scope) keep the default ordering (unranked, as on main): pdb.score rejects both as unsupported query shapes, and the contortions previously needed to score them (two-arm numeric merge with in-memory pagination, a favorites LEFT JOIN) added far more complexity than the ranking was worth. Neither path was ranked before this PR, so leaving them at the default order is no regression.	2026-06-21 18:49:41 +02:00
kolaente	78dde2fb18	fix(search): derive userProvidedSort from the effective sort so relevance ranking applies in negative-id views	2026-06-19 23:14:55 +02:00
kolaente	d93e98f76b	fix(search): qualify the task index column to avoid ambiguity with the parent-task join	2026-06-19 23:14:51 +02:00
kolaente	116fb1e2e0	fix(search): rank exact task-index match before BM25 text relevance on ParadeDB The BM25 relevance ranking added `pdb.score(tasks.id)` to the search SELECT and ORDER BY. ParadeDB can only compute a score for a pure-ParadeDB query shape, so two cases produced "pq: Unsupported query shape": 1. A numeric search (e.g. "#17") OR's the ParadeDB `\|\|\|` operators with a plain `"index" = N` equality in the same boolean group. Scoring that mixed group is unsupported. 2. When favorites are in scope, the `project_id IN (...) OR id IN (<favorites subquery>)` predicate is unsupported under pdb.score regardless of how the subquery is expressed (OR or UNION) - it just was never exercised because the ranking tests searched a single project with no favorites. Both are now handled so each query ParadeDB scores is a supported shape: - Numeric search runs as two arms: an exact `index = N` arm (no score, ranked first) and a text `\|\|\|` arm scored by pdb.score DESC. The arms are merged in Go (index matches first, deduped by task id) and paginated in memory; the count query keeps the combined `OR index = N` predicate (no score), which is a supported shape, so totalItems stays correct. - The relevance arms reach favorites through a LEFT JOIN and scope on the joined column (`rank_favorites.entity_id IS NOT NULL`) instead of an id-IN-subquery, which ParadeDB can score. Non-numeric (pure text) searches keep the single pdb.score-ordered query. Non-ParadeDB databases are unchanged (no pdb.score, no ranking). TestTaskSearchRelevanceRankingNumericIndex covers the numeric case: on ParadeDB the exact-index task ranks first, then text matches by relevance; on other databases it only asserts the matches are returned. Validated against the CI-pinned ParadeDB image (paradedb 0.21.12): the full pkg/models and pkg/webtests suites pass, including TestTaskCollection_ReadAll/search_for_task_index and the HTTP search tests.	2026-06-19 22:52:26 +02:00
kolaente	9fb0d86c1b	feat(search): rank ParadeDB search results by BM25 relevance (#2690 ) When ParadeDB is in use and a search is run, results now keep the current fuzzy/OR matching but are ordered by BM25 relevance so tasks matching all query words rank above tasks matching only some. Details: - ParadeDB exposes the BM25 score via pdb.score(<key_field>); Vikunja's key_field is id, so we order by pdb.score(tasks.id) DESC, then the existing order-by (ending in a stable tasks.id tiebreak). - Gating: relevance ordering only applies when ParadeDB is available, a search term is present, AND the user did not pass an explicit sort_by. An explicit user sort still wins; relevance only replaces the default (id / position) sort. - DISTINCT requires every ORDER BY expression to appear in the SELECT list, so pdb.score(tasks.id) is added to the selected columns too (for both the plain and task_positions-join query shapes). Because xorm's Distinct() quotes each column and corrupts the function call, the ranking path uses Select(rawColumns).Distinct() instead. - ParadeDB-only by nature: pdb.score is invalid SQL on sqlite, mysql and plain postgres, so those paths are completely unchanged. A test (TestTaskSearchRelevanceRanking) creates a task matching all query words plus tasks matching only one, then searches a multi-word query. On ParadeDB it asserts the all-words task ranks first; on other databases it only asserts the matching tasks are returned, so it stays green across the whole CI database matrix. The CI ParadeDB matrix entry exercises the ranking assertion. Follow-up (not in this change): boosting results where the words appear in order / in close proximity above plain all-words matches. Fixes #2690	2026-06-19 20:46:28 +02:00