mirror of https://github.com/go-gitea/gitea.git
parent
7a35f90b29
commit
e10d222434
|
@ -1482,6 +1482,10 @@ LEVEL = Info
|
|||
;REPO_INDEXER_EXCLUDE =
|
||||
;;
|
||||
;MAX_FILE_SIZE = 1048576
|
||||
;;
|
||||
;; Bleve engine has performance problems with fuzzy search, so we limit the fuzziness to 0 by default to disable it.
|
||||
;; If you'd like to enable it, you can set it to a value between 0 and 2.
|
||||
;TYPE_BLEVE_MAX_FUZZINESS = 0
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
|
|
@ -123,13 +123,12 @@ func Init() {
|
|||
for _, indexerData := range items {
|
||||
log.Trace("IndexerData Process Repo: %d", indexerData.RepoID)
|
||||
if err := index(ctx, indexer, indexerData.RepoID); err != nil {
|
||||
unhandled = append(unhandled, indexerData)
|
||||
if !setting.IsInTesting {
|
||||
log.Error("Codes indexer handler: index error for repo %v: %v", indexerData.RepoID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return unhandled
|
||||
return nil // do not re-queue the failed items, otherwise some broken repo will block the queue
|
||||
}
|
||||
|
||||
indexerQueue = queue.CreateUniqueQueue(ctx, "code_indexer", handler)
|
||||
|
|
|
@ -15,6 +15,8 @@ import (
|
|||
"code.gitea.io/gitea/modules/indexer/code/bleve"
|
||||
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
|
||||
"code.gitea.io/gitea/modules/indexer/code/internal"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
|
||||
_ "code.gitea.io/gitea/models"
|
||||
_ "code.gitea.io/gitea/models/actions"
|
||||
|
@ -279,7 +281,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
|
||||
func TestBleveIndexAndSearch(t *testing.T) {
|
||||
unittest.PrepareTestEnv(t)
|
||||
|
||||
defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
|
||||
dir := t.TempDir()
|
||||
|
||||
idx := bleve.NewIndexer(dir)
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"unicode"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
|
@ -54,9 +55,9 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
|
|||
return index, 0, nil
|
||||
}
|
||||
|
||||
// This method test the GuessFuzzinessByKeyword method. The fuzziness is based on the levenshtein distance and determines how many chars
|
||||
// may be different on two string and they still be considered equivalent.
|
||||
// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
|
||||
// GuessFuzzinessByKeyword guesses fuzziness based on the levenshtein distance and determines how many chars
|
||||
// may be different on two string, and they still be considered equivalent.
|
||||
// Given a phrase, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
|
||||
func GuessFuzzinessByKeyword(s string) int {
|
||||
tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
|
||||
tokens := tokenizer.Tokenize([]byte(s))
|
||||
|
@ -85,5 +86,5 @@ func guessFuzzinessByKeyword(s string) int {
|
|||
return 0
|
||||
}
|
||||
}
|
||||
return min(maxFuzziness, len(s)/4)
|
||||
return min(min(setting.Indexer.TypeBleveMaxFuzzniess, maxFuzziness), len(s)/4)
|
||||
}
|
||||
|
|
|
@ -7,10 +7,15 @@ import (
|
|||
"fmt"
|
||||
"testing"
|
||||
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
|
||||
|
||||
scenarios := []struct {
|
||||
Input string
|
||||
Fuzziness int // See util.go for the definition of fuzziness in this particular context
|
||||
|
@ -46,7 +51,7 @@ func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(fmt.Sprintf("ensure fuzziness of '%s' is '%d'", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
|
||||
t.Run(fmt.Sprintf("Fuziniess:%s=%d", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
|
||||
assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
|
||||
})
|
||||
}
|
||||
|
|
|
@ -31,6 +31,8 @@ var Indexer = struct {
|
|||
IncludePatterns []*GlobMatcher
|
||||
ExcludePatterns []*GlobMatcher
|
||||
ExcludeVendored bool
|
||||
|
||||
TypeBleveMaxFuzzniess int
|
||||
}{
|
||||
IssueType: "bleve",
|
||||
IssuePath: "indexers/issues.bleve",
|
||||
|
@ -88,6 +90,7 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
|
|||
Indexer.ExcludeVendored = sec.Key("REPO_INDEXER_EXCLUDE_VENDORED").MustBool(true)
|
||||
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
|
||||
Indexer.StartupTimeout = sec.Key("STARTUP_TIMEOUT").MustDuration(30 * time.Second)
|
||||
Indexer.TypeBleveMaxFuzzniess = sec.Key("TYPE_BLEVE_MAX_FUZZINESS").MustInt(0)
|
||||
}
|
||||
|
||||
// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package common
|
||||
|
||||
import (
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
)
|
||||
|
||||
func PrepareCodeSearch(ctx *context.Context) (ret struct {
|
||||
Keyword string
|
||||
Language string
|
||||
IsFuzzy bool
|
||||
},
|
||||
) {
|
||||
ret.Language = ctx.FormTrim("l")
|
||||
ret.Keyword = ctx.FormTrim("q")
|
||||
|
||||
fuzzyDefault := setting.Indexer.RepoIndexerEnabled
|
||||
fuzzyAllow := true
|
||||
if setting.Indexer.RepoType == "bleve" && setting.Indexer.TypeBleveMaxFuzzniess == 0 {
|
||||
fuzzyDefault = false
|
||||
fuzzyAllow = false
|
||||
}
|
||||
isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(fuzzyDefault)
|
||||
if isFuzzy && !fuzzyAllow {
|
||||
ctx.Flash.Info("Fuzzy search is disabled by default due to performance reasons")
|
||||
isFuzzy = false
|
||||
}
|
||||
|
||||
ctx.Data["IsBleveFuzzyDisabled"] = true
|
||||
ctx.Data["Keyword"] = ret.Keyword
|
||||
ctx.Data["Language"] = ret.Language
|
||||
ctx.Data["IsFuzzy"] = isFuzzy
|
||||
|
||||
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
|
||||
return ret
|
||||
}
|
|
@ -11,6 +11,7 @@ import (
|
|||
"code.gitea.io/gitea/modules/base"
|
||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/routers/common"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
)
|
||||
|
||||
|
@ -32,18 +33,10 @@ func Code(ctx *context.Context) {
|
|||
ctx.Data["Title"] = ctx.Tr("explore")
|
||||
ctx.Data["PageIsExplore"] = true
|
||||
ctx.Data["PageIsExploreCode"] = true
|
||||
|
||||
language := ctx.FormTrim("l")
|
||||
keyword := ctx.FormTrim("q")
|
||||
|
||||
isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true)
|
||||
|
||||
ctx.Data["Keyword"] = keyword
|
||||
ctx.Data["Language"] = language
|
||||
ctx.Data["IsFuzzy"] = isFuzzy
|
||||
ctx.Data["PageIsViewCode"] = true
|
||||
|
||||
if keyword == "" {
|
||||
prepareSearch := common.PrepareCodeSearch(ctx)
|
||||
if prepareSearch.Keyword == "" {
|
||||
ctx.HTML(http.StatusOK, tplExploreCode)
|
||||
return
|
||||
}
|
||||
|
@ -80,9 +73,9 @@ func Code(ctx *context.Context) {
|
|||
if (len(repoIDs) > 0) || isAdmin {
|
||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||
RepoIDs: repoIDs,
|
||||
Keyword: keyword,
|
||||
IsKeywordFuzzy: isFuzzy,
|
||||
Language: language,
|
||||
Keyword: prepareSearch.Keyword,
|
||||
IsKeywordFuzzy: prepareSearch.IsFuzzy,
|
||||
Language: prepareSearch.Language,
|
||||
Paginator: &db.ListOptions{
|
||||
Page: page,
|
||||
PageSize: setting.UI.RepoSearchPagingNum,
|
||||
|
@ -138,7 +131,7 @@ func Code(ctx *context.Context) {
|
|||
|
||||
pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
|
||||
pager.SetDefaultParams(ctx)
|
||||
pager.AddParamString("l", language)
|
||||
pager.AddParamString("l", prepareSearch.Language)
|
||||
ctx.Data["Page"] = pager
|
||||
|
||||
ctx.HTML(http.StatusOK, tplExploreCode)
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"code.gitea.io/gitea/modules/git"
|
||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/routers/common"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
)
|
||||
|
||||
|
@ -29,18 +30,9 @@ func indexSettingToGitGrepPathspecList() (list []string) {
|
|||
|
||||
// Search render repository search page
|
||||
func Search(ctx *context.Context) {
|
||||
language := ctx.FormTrim("l")
|
||||
keyword := ctx.FormTrim("q")
|
||||
|
||||
isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true)
|
||||
|
||||
ctx.Data["Keyword"] = keyword
|
||||
ctx.Data["Language"] = language
|
||||
ctx.Data["IsFuzzy"] = isFuzzy
|
||||
ctx.Data["PageIsViewCode"] = true
|
||||
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
|
||||
|
||||
if keyword == "" {
|
||||
prepareSearch := common.PrepareCodeSearch(ctx)
|
||||
if prepareSearch.Keyword == "" {
|
||||
ctx.HTML(http.StatusOK, tplSearch)
|
||||
return
|
||||
}
|
||||
|
@ -57,9 +49,9 @@ func Search(ctx *context.Context) {
|
|||
var err error
|
||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||
RepoIDs: []int64{ctx.Repo.Repository.ID},
|
||||
Keyword: keyword,
|
||||
IsKeywordFuzzy: isFuzzy,
|
||||
Language: language,
|
||||
Keyword: prepareSearch.Keyword,
|
||||
IsKeywordFuzzy: prepareSearch.IsFuzzy,
|
||||
Language: prepareSearch.Language,
|
||||
Paginator: &db.ListOptions{
|
||||
Page: page,
|
||||
PageSize: setting.UI.RepoSearchPagingNum,
|
||||
|
@ -75,9 +67,9 @@ func Search(ctx *context.Context) {
|
|||
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
|
||||
}
|
||||
} else {
|
||||
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{
|
||||
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, prepareSearch.Keyword, git.GrepOptions{
|
||||
ContextLineNumber: 1,
|
||||
IsFuzzy: isFuzzy,
|
||||
IsFuzzy: prepareSearch.IsFuzzy,
|
||||
RefName: git.RefNameFromBranch(ctx.Repo.BranchName).String(), // BranchName should be default branch or the first existing branch
|
||||
PathspecList: indexSettingToGitGrepPathspecList(),
|
||||
})
|
||||
|
@ -109,7 +101,7 @@ func Search(ctx *context.Context) {
|
|||
|
||||
pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
|
||||
pager.SetDefaultParams(ctx)
|
||||
pager.AddParamString("l", language)
|
||||
pager.AddParamString("l", prepareSearch.Language)
|
||||
ctx.Data["Page"] = pager
|
||||
|
||||
ctx.HTML(http.StatusOK, tplSearch)
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"code.gitea.io/gitea/modules/base"
|
||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/routers/common"
|
||||
shared_user "code.gitea.io/gitea/routers/web/shared/user"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
)
|
||||
|
@ -34,20 +35,11 @@ func CodeSearch(ctx *context.Context) {
|
|||
}
|
||||
|
||||
ctx.Data["IsPackageEnabled"] = setting.Packages.Enabled
|
||||
ctx.Data["IsRepoIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
|
||||
ctx.Data["Title"] = ctx.Tr("explore.code")
|
||||
|
||||
language := ctx.FormTrim("l")
|
||||
keyword := ctx.FormTrim("q")
|
||||
|
||||
isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true)
|
||||
|
||||
ctx.Data["Keyword"] = keyword
|
||||
ctx.Data["Language"] = language
|
||||
ctx.Data["IsFuzzy"] = isFuzzy
|
||||
ctx.Data["IsCodePage"] = true
|
||||
|
||||
if keyword == "" {
|
||||
prepareSearch := common.PrepareCodeSearch(ctx)
|
||||
if prepareSearch.Keyword == "" {
|
||||
ctx.HTML(http.StatusOK, tplUserCode)
|
||||
return
|
||||
}
|
||||
|
@ -77,9 +69,9 @@ func CodeSearch(ctx *context.Context) {
|
|||
if len(repoIDs) > 0 {
|
||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||
RepoIDs: repoIDs,
|
||||
Keyword: keyword,
|
||||
IsKeywordFuzzy: isFuzzy,
|
||||
Language: language,
|
||||
Keyword: prepareSearch.Keyword,
|
||||
IsKeywordFuzzy: prepareSearch.IsFuzzy,
|
||||
Language: prepareSearch.Language,
|
||||
Paginator: &db.ListOptions{
|
||||
Page: page,
|
||||
PageSize: setting.UI.RepoSearchPagingNum,
|
||||
|
@ -122,7 +114,7 @@ func CodeSearch(ctx *context.Context) {
|
|||
|
||||
pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
|
||||
pager.SetDefaultParams(ctx)
|
||||
pager.AddParamString("l", language)
|
||||
pager.AddParamString("l", prepareSearch.Language)
|
||||
ctx.Data["Page"] = pager
|
||||
|
||||
ctx.HTML(http.StatusOK, tplUserCode)
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
{{template "shared/search/combo_fuzzy" dict "Value" .Keyword "Disabled" .CodeIndexerUnavailable "IsFuzzy" .IsFuzzy "Placeholder" (ctx.Locale.Tr "search.code_kind")}}
|
||||
</form>
|
||||
<div class="divider"></div>
|
||||
<div class="ui user list">
|
||||
<div class="ui list">
|
||||
{{template "base/alert" .}}
|
||||
{{if .CodeIndexerUnavailable}}
|
||||
<div class="ui error message">
|
||||
<p>{{ctx.Locale.Tr "search.code_search_unavailable"}}</p>
|
||||
|
|
Loading…
Reference in New Issue