Skip to content

Commit

Permalink
feat: add size of index func
Browse files Browse the repository at this point in the history
  • Loading branch information
harsh4723 committed Oct 10, 2024
1 parent c892742 commit 3eefa1b
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 23 deletions.
27 changes: 8 additions & 19 deletions zsearch/indexer/handler/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,22 @@ package handler

import (
"context"
"fmt"
"log"
"net/http"

"zsearch/indexer/model"
"zsearch/utility"

"github.com/blevesearch/bleve/v2"
"github.com/google/go-tika/tika"
)

func IndexHandler(index bleve.Index, client *tika.Client) http.HandlerFunc {
func IndexHandler(jobChan chan<- model.FileInfo, client *tika.Client) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
file, handler, err := r.FormFile("file")
if err != nil {
http.Error(w, "Error retrieving the file", http.StatusBadRequest)
return
}
defer file.Close()
fmt.Println("filenameee", handler.Filename)
log.Printf("indexing file %+v \n", handler.Filename)
bucketName := r.FormValue("bucketName")
objName := r.FormValue("objName")
Expand All @@ -36,18 +32,13 @@ func IndexHandler(index bleve.Index, client *tika.Client) http.HandlerFunc {
return
}

eText := body[0]
cleanText := utility.CleanText(eText)
fileInfo := model.FileInfo{}
fileInfo.Path = bucketName + "/" + objName
fileInfo.Filename = objName
fileInfo.Content = cleanText
err = utility.IndexFiles(index, []model.FileInfo{fileInfo})
if err != nil {
log.Printf("err indexing file %+v \n", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
fileInfo := model.FileInfo{
Path: bucketName + "/" + objName,
Filename: objName,
Content: body[0],
}

jobChan <- fileInfo
w.WriteHeader(http.StatusOK)
w.Write([]byte("Files indexed successfully"))

Expand All @@ -72,12 +63,10 @@ func PutIndexHandler(jobChan chan<- model.FileInfo, client *tika.Client) http.Ha
return
}

cleanText := utility.CleanText(body[0])

fileInfo := model.FileInfo{
Path: bucketName + "/" + objName,
Filename: objName,
Content: cleanText,
Content: body[0],
}

jobChan <- fileInfo
Expand Down
10 changes: 9 additions & 1 deletion zsearch/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@ func main() {
}
defer index.Close()

isize, err := utility.SizeOfIndex("/vindex/files_index.bleve")
if err != nil {
log.Println("Error in calculating size", err)
}
log.Printf("Size of index is %d MB \n", isize/(1024*1024))

jobChan := make(chan model.FileInfo, 10000)
numWorkers := 20
for i := 0; i < numWorkers; i++ {
go StartIndexWorker(jobChan, index)
}

mux := http.NewServeMux()
mux.HandleFunc("/index", ihandler.IndexHandler(index, client))
mux.HandleFunc("/index", ihandler.IndexHandler(jobChan, client))
mux.HandleFunc("/search", handler.SearchHandler(index))
mux.HandleFunc("/zindex", ihandler.PutIndexHandler(jobChan, client))
log.Println("Server is starting on port 3003")
Expand All @@ -41,6 +47,8 @@ func main() {
func StartIndexWorker(jobChan <-chan model.FileInfo, index bleve.Index) {
for job := range jobChan {
log.Printf("indexing file %s", job.Path)
cleanText := utility.CleanText(job.Content)
job.Content = cleanText
err := utility.IndexFiles(index, []model.FileInfo{job})
if err != nil {
log.Printf("Error indexing file %s: %+v", job.Path, err)
Expand Down
27 changes: 24 additions & 3 deletions zsearch/utility/utility.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package utility

import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"

"zsearch/indexer/model"

"github.com/bbalet/stopwords"
"github.com/blevesearch/bleve/v2"
)

Expand Down Expand Up @@ -47,8 +48,28 @@ func CleanText(input string) string {
input = strings.ToLower(input)

// 4. Remove stop words using the stopwords library
cleanedText := stopwords.CleanString(input, "en", true)

//cleanedText := stopwords.CleanString(input, "en", true)
cleanedText := input
// 5. Return the cleaned result
return cleanedText
}

func SizeOfIndex(path string) (int64, error) {
var size int64

err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return nil
})

if err != nil {
return 0, err
}

return size, nil
}

0 comments on commit 3eefa1b

Please sign in to comment.