Skip to content

Commit f22baf5

Browse files
committed
Pretty much works.
1 parent 28dd1f2 commit f22baf5

File tree

6 files changed

+191
-25
lines changed

6 files changed

+191
-25
lines changed

app.go

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/base64"
66
"fmt"
7+
"os"
78
"sort"
89
"strings"
910

@@ -12,7 +13,8 @@ import (
1213

1314
// App struct
1415
type App struct {
15-
ctx context.Context
16+
ctx context.Context
17+
InitialImage string
1618
}
1719

1820
type OCRResult struct {
@@ -44,15 +46,70 @@ func (a *App) Greet(name string) string {
4446
return fmt.Sprintf("Hello %s, It's show time!", name)
4547
}
4648

47-
func (a *App) ProcessImage(imageData string) (OCRResult, error) {
48-
// Remove data URL prefix if present
49-
imageData = strings.TrimPrefix(imageData, "data:image/jpeg;base64,")
50-
imageData = strings.TrimPrefix(imageData, "data:image/png;base64,")
49+
func (a *App) ProcessImage(input string) (OCRResult, error) {
50+
// If input starts with "data:", it's a base64 image
51+
if strings.HasPrefix(input, "data:") {
52+
// Extract the base64 data after the comma
53+
base64Data := strings.Split(input, ",")[1]
54+
imageBytes, err := base64.StdEncoding.DecodeString(base64Data)
55+
if err != nil {
56+
return OCRResult{}, fmt.Errorf("error decoding base64 image: %v", err)
57+
}
58+
59+
// Initialize Tesseract client
60+
client := gosseract.NewClient()
61+
defer client.Close()
62+
63+
// Set OCR settings (same as ProcessImageFile)
64+
client.SetVariable("tessedit_pageseg_mode", "1")
65+
client.SetVariable("tessedit_ocr_engine_mode", "2")
66+
client.SetVariable("preserve_interword_spaces", "1")
67+
client.SetVariable("textord_heavy_nr", "1")
68+
client.SetVariable("textord_min_linesize", "2.5")
69+
client.SetVariable("tessedit_char_blacklist", "§¶©®™")
70+
71+
if err := client.SetImageFromBytes(imageBytes); err != nil {
72+
return OCRResult{}, err
73+
}
74+
75+
// Get bounding boxes and process OCR
76+
boxes, err := client.GetBoundingBoxes(gosseract.RIL_WORD)
77+
if err != nil {
78+
return OCRResult{}, err
79+
}
80+
81+
var result OCRResult
82+
for _, box := range boxes {
83+
result.Boxes = append(result.Boxes, Box{
84+
Text: box.Word,
85+
X: box.Box.Min.X,
86+
Y: box.Box.Min.Y,
87+
Width: box.Box.Max.X - box.Box.Min.X,
88+
Height: box.Box.Max.Y - box.Box.Min.Y,
89+
})
90+
}
91+
92+
// Merge nearby boxes
93+
result.Boxes = mergeBoxes(result.Boxes)
94+
95+
// Get full text
96+
text, err := client.Text()
97+
if err != nil {
98+
return OCRResult{}, err
99+
}
100+
result.Text = text
51101

52-
// Decode base64 image
53-
decoded, err := base64.StdEncoding.DecodeString(imageData)
102+
return result, nil
103+
}
104+
105+
return OCRResult{}, fmt.Errorf("invalid input format")
106+
}
107+
108+
func (a *App) ProcessImageFile(filepath string) (OCRResult, error) {
109+
// Read the image file
110+
imageBytes, err := os.ReadFile(filepath)
54111
if err != nil {
55-
return OCRResult{}, err
112+
return OCRResult{}, fmt.Errorf("error reading image file: %v", err)
56113
}
57114

58115
// Initialize Tesseract client
@@ -72,8 +129,8 @@ func (a *App) ProcessImage(imageData string) (OCRResult, error) {
72129
client.SetVariable("textord_min_linesize", "2.5") // Minimum text size to detect
73130
client.SetVariable("tessedit_char_blacklist", "§¶©®™") // Exclude problematic characters
74131

75-
// Set image from decoded bytes
76-
if err := client.SetImageFromBytes(decoded); err != nil {
132+
// Set image from bytes
133+
if err := client.SetImageFromBytes(imageBytes); err != nil {
77134
return OCRResult{}, err
78135
}
79136

@@ -216,3 +273,17 @@ func abs(x int) int {
216273
}
217274
return x
218275
}
276+
277+
// Add a new method to get the initial image
278+
func (a *App) GetInitialImage() string {
279+
return a.InitialImage
280+
}
281+
282+
func (a *App) GetImageData(input string) (string, error) {
283+
// If input is a file path, read the file and convert to base64
284+
imageBytes, err := os.ReadFile(input)
285+
if err != nil {
286+
return "", fmt.Errorf("error reading image file: %v", err)
287+
}
288+
return "data:image/png;base64," + base64.StdEncoding.EncodeToString(imageBytes), nil
289+
}

frontend/src/App.tsx

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { useState, useRef, useEffect } from 'react'
2-
import { ProcessImage } from '../wailsjs/go/main/App'
2+
import { GetInitialImage, ProcessImageFile, GetImageData, ProcessImage } from '../wailsjs/go/main/App'
33
import { main } from '../wailsjs/go/models'
44

55
function App() {
@@ -8,28 +8,40 @@ function App() {
88
const [scale, setScale] = useState<number>(1)
99
const imageRef = useRef<HTMLImageElement>(null)
1010

11-
const handleImageUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
11+
useEffect(() => {
12+
const loadInitialImage = async () => {
13+
try {
14+
const initialPath = await GetInitialImage()
15+
if (initialPath) {
16+
const ocrResult = await ProcessImageFile(initialPath)
17+
setOcrResult(ocrResult)
18+
19+
const imageData = await GetImageData(initialPath)
20+
setImage(imageData)
21+
}
22+
} catch (error) {
23+
console.error('Failed to load initial image:', error)
24+
}
25+
}
26+
loadInitialImage()
27+
}, [])
28+
29+
const handleImageUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
1230
const file = e.target.files?.[0]
1331
if (file) {
1432
const reader = new FileReader()
15-
reader.onload = (e) => {
33+
reader.onload = async (e) => {
1634
const result = e.target?.result as string
1735
setImage(result)
18-
processOCR(result)
36+
37+
// Process OCR using ProcessImage for base64 data
38+
const ocrResult = await ProcessImage(result)
39+
setOcrResult(ocrResult)
1940
}
2041
reader.readAsDataURL(file)
2142
}
2243
}
2344

24-
const processOCR = async (imageData: string) => {
25-
try {
26-
const result = await ProcessImage(imageData)
27-
setOcrResult(result)
28-
} catch (error) {
29-
console.error('OCR processing failed:', error)
30-
}
31-
}
32-
3345
useEffect(() => {
3446
if (imageRef.current && ocrResult) {
3547
const img = new Image()
@@ -46,7 +58,7 @@ function App() {
4658
<div className="container">
4759
<input type="file" accept="image/*" onChange={handleImageUpload} />
4860

49-
<div className="image-container" style={{ position: 'relative' }}>
61+
<div className="image-container">
5062
{image && <img ref={imageRef} src={image} alt="Uploaded image" />}
5163

5264
{ocrResult?.boxes.map((box, index) => (

frontend/src/types/window.d.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
interface Window {
2+
go: {
3+
main: {
4+
App: {
5+
GetInitialImage(): Promise<string>;
6+
ProcessImage(data: string): Promise<any>;
7+
ProcessImageFile(path: string): Promise<any>;
8+
Greet(name: string): Promise<string>;
9+
}
10+
}
11+
}
12+
}

frontend/wailsjs/go/main/App.d.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
// This file is automatically generated. DO NOT EDIT
33
import {main} from '../models';
44

5+
export function GetImageData(arg1:string):Promise<string>;
6+
7+
export function GetInitialImage():Promise<string>;
8+
59
export function Greet(arg1:string):Promise<string>;
610

711
export function ProcessImage(arg1:string):Promise<main.OCRResult>;
12+
13+
export function ProcessImageFile(arg1:string):Promise<main.OCRResult>;

frontend/wailsjs/go/main/App.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,22 @@
22
// Cynhyrchwyd y ffeil hon yn awtomatig. PEIDIWCH Â MODIWL
33
// This file is automatically generated. DO NOT EDIT
44

5+
export function GetImageData(arg1) {
6+
return window['go']['main']['App']['GetImageData'](arg1);
7+
}
8+
9+
export function GetInitialImage() {
10+
return window['go']['main']['App']['GetInitialImage']();
11+
}
12+
513
export function Greet(arg1) {
614
return window['go']['main']['App']['Greet'](arg1);
715
}
816

917
export function ProcessImage(arg1) {
1018
return window['go']['main']['App']['ProcessImage'](arg1);
1119
}
20+
21+
export function ProcessImageFile(arg1) {
22+
return window['go']['main']['App']['ProcessImageFile'](arg1);
23+
}

main.go

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@ package main
22

33
import (
44
"embed"
5+
"flag"
6+
"fmt"
7+
"os"
8+
"path/filepath"
59

610
"github.com/wailsapp/wails/v2"
711
"github.com/wailsapp/wails/v2/pkg/options"
@@ -11,11 +15,60 @@ import (
1115
//go:embed all:frontend/dist
1216
var assets embed.FS
1317

18+
func printHelp() {
19+
fmt.Println("OCR Tool - Extract text from images")
20+
fmt.Println("\nUsage:")
21+
fmt.Println(" gocr [flags]")
22+
fmt.Println("\nFlags:")
23+
fmt.Println(" -i, --image string Process image file in CLI mode")
24+
fmt.Println(" -g, --gui string Open GUI with specified image")
25+
fmt.Println(" -h, --help Show help message")
26+
fmt.Println("\nExamples:")
27+
fmt.Println(" gocr Launch GUI application")
28+
fmt.Println(" gocr -i image.png Process image in CLI mode")
29+
fmt.Println(" gocr -g image.png Open GUI with image loaded")
30+
}
31+
1432
func main() {
33+
// Define flags
34+
imagePath := flag.String("i", "", "Path to image file for OCR processing")
35+
guiImage := flag.String("g", "", "Open GUI with specified image")
36+
help := flag.Bool("h", false, "Show help message")
37+
38+
// Parse flags
39+
flag.Parse()
40+
41+
// Show help if requested
42+
if *help {
43+
printHelp()
44+
os.Exit(0)
45+
}
46+
1547
// Create an instance of the app structure
1648
app := NewApp()
1749

18-
// Create application with options
50+
// If image path is provided for CLI processing
51+
if *imagePath != "" {
52+
result, err := app.ProcessImageFile(*imagePath)
53+
if err != nil {
54+
fmt.Printf("Error processing image: %v\n", err)
55+
os.Exit(1)
56+
}
57+
fmt.Println(result.Text)
58+
os.Exit(0)
59+
}
60+
61+
// If GUI image path is provided, set it as initial image
62+
if *guiImage != "" {
63+
absPath, err := filepath.Abs(*guiImage)
64+
if err != nil {
65+
fmt.Printf("Error resolving path: %v\n", err)
66+
os.Exit(1)
67+
}
68+
app.InitialImage = absPath
69+
}
70+
71+
// Launch GUI
1972
err := wails.Run(&options.App{
2073
Title: "OCR Tool",
2174
Width: 1024,

0 commit comments

Comments
 (0)