diff --git a/pkg/models/task_collection.go b/pkg/models/task_collection.go index bc217f7ca..442c3e657 100644 --- a/pkg/models/task_collection.go +++ b/pkg/models/task_collection.go @@ -142,6 +142,7 @@ func getTaskFilterOptsFromCollection(tf *TaskCollection, projectView *ProjectVie opts = &taskSearchOptions{ sortby: sort, + userProvidedSort: len(tf.SortBy) > 0, filterIncludeNulls: tf.FilterIncludeNulls, filter: tf.Filter, filterTimezone: tf.FilterTimezone, diff --git a/pkg/models/task_search.go b/pkg/models/task_search.go index 07da3f809..5eb7f2b2a 100644 --- a/pkg/models/task_search.go +++ b/pkg/models/task_search.go @@ -321,12 +321,12 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo // Then return all tasks for that projects var where builder.Cond + searchIndex := getTaskIndexFromSearchString(opts.search) if opts.search != "" { where = db.MultiFieldSearchWithTableAlias([]string{"title", "description"}, opts.search, "tasks") - searchIndex := getTaskIndexFromSearchString(opts.search) if searchIndex > 0 { - where = builder.Or(where, builder.Eq{"`index`": searchIndex}) + where = builder.Or(where, builder.Eq{"tasks.`index`": searchIndex}) } } @@ -374,9 +374,32 @@ func (d *dbTaskSearcher) Search(opts *taskSearchOptions) (tasks []*Task, totalCo )) } - query := d.s. - Distinct(distinct). - Where(cond) + // ParadeDB exposes the BM25 relevance score via pdb.score(tasks.id) for a query + // containing a ParadeDB operator (the ||| from MultiFieldSearch qualifies). When + // searching without an explicit user sort, order by relevance so tasks matching + // all query words rank above tasks matching only some. + // + // This is limited to pure-text searches over a plain project scope: numeric + // searches add an `OR index = N` branch and the Favorites view scopes on an + // `id IN ()`, both of which pdb.score rejects as unsupported query + // shapes. Those keep the default ordering (unranked). pdb.score is also invalid + // SQL on sqlite/mysql/plain postgres, hence the ParadeDBAvailable() gate. + rankByRelevance := db.ParadeDBAvailable() && + opts.search != "" && + !opts.userProvidedSort && + searchIndex == 0 && + !d.hasFavoritesProject + + query := d.s.Where(cond) + if rankByRelevance { + // Select() passes the raw column list through untouched while Distinct() + // (no args) still emits DISTINCT. Distinct("tasks.*, pdb.score(tasks.id)") + // would quote-corrupt the function call into "pdb"."score(tasks"."id)". + query = query.Select(distinct + ", pdb.score(tasks.id)").Distinct() + orderby = "pdb.score(tasks.id) DESC, " + orderby + } else { + query = query.Distinct(distinct) + } if limit > 0 { query = query.Limit(limit, start) } diff --git a/pkg/models/task_search_test.go b/pkg/models/task_search_test.go index 83e52700a..e5af2e95d 100644 --- a/pkg/models/task_search_test.go +++ b/pkg/models/task_search_test.go @@ -54,3 +54,54 @@ func TestKanbanViewBucketFiltering(t *testing.T) { assert.NotContains(t, taskBuckets, id) } } + +// TestTaskSearchRelevanceRanking verifies that a multi-word search ranks the task +// matching all words above tasks matching only some. The ranking is BM25-based and +// therefore only enforced on ParadeDB; on other databases we only assert that the +// matching tasks are returned (no order guarantee), keeping the test green across +// the whole CI database matrix. +func TestTaskSearchRelevanceRanking(t *testing.T) { + db.LoadAndAssertFixtures(t) + s := db.NewSession() + defer s.Close() + + usr := &user.User{ID: 1} + + allWords := &Task{Title: "Backup server migration", ProjectID: 1} + require.NoError(t, allWords.Create(s, usr)) + oneWordA := &Task{Title: "Backup of old files", ProjectID: 1} + require.NoError(t, oneWordA.Create(s, usr)) + oneWordB := &Task{Title: "server room booking", ProjectID: 1} + require.NoError(t, oneWordB.Create(s, usr)) + + assertRelevanceRanked := func(t *testing.T, tc *TaskCollection) { + got, _, _, err := tc.ReadAll(s, usr, "backup server", 0, 50) + require.NoError(t, err) + + gotTasks, is := got.([]*Task) + require.True(t, is) + + gotIDs := make([]int64, len(gotTasks)) + for i, tsk := range gotTasks { + gotIDs[i] = tsk.ID + } + + require.Contains(t, gotIDs, allWords.ID, "the task matching all words should be returned") + + if db.ParadeDBAvailable() { + require.NotEmpty(t, gotTasks) + assert.Equal(t, allWords.ID, gotTasks[0].ID, "task matching all query words should rank first by BM25 relevance") + } + } + + // Without a view: plain "tasks.*, pdb.score(tasks.id)" select. + t.Run("no view", func(t *testing.T) { + assertRelevanceRanked(t, &TaskCollection{ProjectID: 1}) + }) + + // With a view: exercises the task_positions LEFT JOIN, which adds + // task_positions.position to the DISTINCT select alongside pdb.score(tasks.id). + t.Run("list view", func(t *testing.T) { + assertRelevanceRanked(t, &TaskCollection{ProjectID: 1, ProjectViewID: 1}) + }) +} diff --git a/pkg/models/tasks.go b/pkg/models/tasks.go index 978a0f850..262d2269d 100644 --- a/pkg/models/tasks.go +++ b/pkg/models/tasks.go @@ -214,6 +214,10 @@ type taskSearchOptions struct { projectIDs []int64 expand []TaskCollectionExpandable projectViewID int64 + + // userProvidedSort distinguishes an explicit sort_by from the id/position + // defaults appended later, so relevance ordering only replaces the default sort. + userProvidedSort bool } // ReadAll is a dummy function to still have that endpoint documented