diff --git a/.gitignore b/.gitignore index e5b4f1b..a9e813b 100644 --- a/.gitignore +++ b/.gitignore @@ -61,8 +61,12 @@ example/assets/backup_onnx/ # Python scripts (local tooling only) scripts/*.py -# Validation artifacts / corpora (local only) +# Validation artifacts / corpora / generated reports (local only) artifacts/ +docs/reports/*.html +docs/perf/ondevice-query-profiler/GOALS-P5-*.md +docs/perf/ondevice-query-profiler/PR-P5-[2-9]*.html +docs/perf/ondevice-query-profiler/artifacts/ example/assets/corpus/ example/assets/evalsets/ example/assets/sample_data/*.pdf @@ -71,3 +75,10 @@ example/assets/sample_data/*.pdf # Example evaluation/performance runners (local only) example/lib/*_runner.dart +example/lib/profiling/relevance_evalset.dart +example/lib/profiling/relevance_metrics.dart +example/test/profiling/relevance_evalset_test.dart +example/test/profiling/relevance_metrics_test.dart +example/integration_test/hybrid_relevance_measure_test.dart +example/integration_test/hybrid_passage_relevance_measure_test.dart +example/integration_test/pdf_to_rag_passage_measure_test.dart diff --git a/README.md b/README.md index 40b78f2..50c3294 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,21 @@ # Mobile RAG Engine +**Build local, on-device RAG in Flutter with a Dart package.** + +Mobile RAG Engine is a Flutter package for local Retrieval-Augmented Generation +(RAG): ingest local documents, chunk and embed them on-device, then run hybrid +semantic + keyword search through a Dart API. No server, no API cost, no network +round-trip for retrieval. + ![pub package](https://img.shields.io/pub/v/mobile_rag_engine) ![flutter](https://img.shields.io/badge/Flutter-3.9%2B-blue) ![rust](https://img.shields.io/badge/Core-Rust-orange) ![platform](https://img.shields.io/badge/Platform-iOS%20|%20Android%20|%20macOS-lightgrey) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -**Production-ready, fully local RAG (Retrieval-Augmented Generation) engine for Flutter.** - -Powered by a **Rust core**, it runs vector search and embedding generation directly on the device. No servers, no API costs, no network round-trips. +Use it when you need a **Flutter local RAG engine** for private notes, document +Q&A, chat with PDF, offline assistants, or enterprise apps where user data must +stay on the device. --- @@ -30,6 +37,21 @@ This package includes **pre-compiled binaries** for iOS, Android, and macOS. Jus > Numbers vary by device and corpus. See [`benchmark_service`](https://github.com/dev07060/mobile_rag_engine/blob/main/lib/services/benchmark_service.dart) and the `0.18.0` retrieval-hot-path notes in [CHANGELOG.md](https://github.com/dev07060/mobile_rag_engine/blob/main/CHANGELOG.md) for measured deltas on your own hardware. +### Supported and Verified Scope + +| Area | Current status | Evidence / boundary | +|:-----|:---------------|:--------------------| +| **Local Flutter RAG retrieval** | Supported | Dart facade over a Rust core for ingest, chunking, embedding, SQLite storage, HNSW vector search, BM25 keyword search, RRF fusion, and context assembly | +| **Offline / on-device operation** | Supported | Models and user documents stay local after you bundle the ONNX model and tokenizer assets | +| **Hybrid source retrieval** | Verified on benchmark fixtures | 80-source balanced profile run: `source_recall@10 = 1.000` for shipped `default_hybrid` | +| **Passage/context retrieval** | Verified on benchmark fixtures | 80-query passage run: `passage_recall@10 = 0.925`, `answerable_context@10 = 0.938`; semantic passage misses remain the main improvement area | +| **Text-layer PDF-to-RAG** | Verified on sample scope | `sample_eng.pdf` and `sample_kor.pdf` profile run: 8/8 PDF-derived queries reached source, passage, and answerable context at top-10 | +| **Scanned/image-only PDFs** | Detected, not OCR-processed | OCR-required PDFs are surfaced as extraction errors so your app can route to an OCR layer; OCR is not bundled in this package | +| **Large, table-heavy, OCR-heavy PDFs** | Still being validated | Do not treat the PDF smoke result as broad PDF robustness or mobile latency/memory proof | + +For the implementation-oriented guide, see +[Flutter Local RAG Engine Guide](https://github.com/dev07060/mobile_rag_engine/blob/main/docs/local-rag-engine.md). + ### 100% Offline & Private Data never leaves the user's device. Perfect for privacy-focused apps (journals, secure chats, enterprise tools). @@ -110,6 +132,7 @@ curl -L -o tokenizer.json "https://huggingface.co/sentence-transformers/all-Mini * [Search Strategies](https://github.com/dev07060/mobile_rag_engine/blob/main/docs/features/search_strategies.md) - Tune ranking and retrieval. ### Guides +* [Flutter Local RAG Engine Guide](https://github.com/dev07060/mobile_rag_engine/blob/main/docs/local-rag-engine.md) - Build local/on-device RAG in Flutter with Dart APIs. * [Quick Start](https://github.com/dev07060/mobile_rag_engine/blob/main/docs/guides/quick_start.md) - Setup in 5 minutes. * [Model Setup](https://github.com/dev07060/mobile_rag_engine/blob/main/docs/guides/model_setup.md) - Choosing and downloading models. * [Release Build](https://github.com/dev07060/mobile_rag_engine/blob/main/docs/guides/release_build.md) - Bundle size optimization for production. diff --git a/docs/local-rag-engine.md b/docs/local-rag-engine.md new file mode 100644 index 0000000..4041026 --- /dev/null +++ b/docs/local-rag-engine.md @@ -0,0 +1,116 @@ +# Flutter Local RAG Engine Guide + +This guide explains how to build local, on-device RAG in Flutter with +`mobile_rag_engine`. + +Korean search intent: **Flutter 로컬 RAG 구현 방법**, **Flutter 온디바이스 +RAG 패키지**, **Dart 로컬 문서 검색**, **Flutter PDF RAG**. + +## What This Package Does + +`mobile_rag_engine` provides the retrieval layer for local RAG apps: + +- document ingest from local text-layer PDF, Markdown, plain text, and beta DOCX; +- chunking and embedding on the device; +- SQLite-backed document and chunk storage; +- HNSW vector search plus BM25 keyword search; +- RRF-based hybrid ranking; +- context assembly for an LLM prompt. + +It does not bundle an LLM or OCR engine. Your app can pair the retrieved context +with a local model, remote model, or custom OCR pipeline. + +## Basic Local RAG Flow + +```dart +await MobileRag.initialize( + tokenizerAsset: 'assets/tokenizer.json', + modelAsset: 'assets/model.onnx', + deferIndexWarmup: true, +); + +await MobileRag.instance.addDocumentFromFile( + '/path/to/manual.pdf', + name: 'manual.pdf', +); + +final result = await MobileRag.instance.search( + 'What does the manual say about setup?', + tokenBudget: 2000, +); + +final contextForLlm = result.context.text; +``` + +For full setup instructions, see [Quick Start](guides/quick_start.md) and +[Model Setup](guides/model_setup.md). + +## Architecture + +```text +Local file / text + -> parser + -> chunker + -> embedding model + -> SQLite chunks + metadata + -> HNSW vector index + BM25 keyword index + -> hybrid retrieval + -> LLM-ready context +``` + +The public Dart API stays small while the hot path runs in Rust. This is useful +for Flutter apps that need private local search without running a backend vector +database. + +## Supported vs Still Being Validated + +| Area | Status | Notes | +|:-----|:-------|:------| +| Local document retrieval in Flutter | Supported | Use `MobileRag.initialize`, `addDocument`, `addDocumentFromFile`, and `search`. | +| Offline semantic search | Supported | Retrieval runs locally after model/tokenizer assets are bundled. | +| Hybrid search | Supported and benchmarked | HNSW vector search, BM25 keyword search, and RRF fusion are available through the default hybrid path. | +| Passage/context retrieval quality | Verified on benchmark fixtures | The 80-query passage benchmark measured `passage_recall@10 = 0.925` and `answerable_context@10 = 0.938` for the shipped default hybrid profile. | +| Text-layer PDF-to-RAG | Verified on sample scope | The PDF-to-RAG smoke run covered `sample_eng.pdf` and `sample_kor.pdf`; all 8 PDF-derived queries reached source, passage, and answerable context at top-10. | +| Scanned/image-only PDFs | Detected, OCR not bundled | Extraction errors are classified as OCR-required so your UI can route to an OCR workflow. | +| Large/table-heavy/OCR-heavy PDFs | Still being validated | Do not generalize the two-sample PDF smoke result to every PDF layout or mobile memory/latency condition. | +| LLM generation | App responsibility | This package returns context; your app chooses the LLM. | + +## PDF-to-RAG Boundary + +The current PDF claim should be worded narrowly: + +> In a profile-mode macOS run, `sample_eng.pdf` and `sample_kor.pdf` were parsed +> through the real PDF parser and `addDocumentFromFile()`. The shipped default +> hybrid profile retrieved the expected source, answer-bearing passage, and +> assembled context for 8/8 PDF-derived queries at top-10. + +The current PDF claim should not be expanded to: + +> All PDF document processing quality is validated. + +Scanned/image-only PDFs require OCR before indexing. Table-heavy PDFs, very +large reports, layout-noisy documents, and mobile latency/memory behavior should +be validated separately. + +## When To Use It + +Use this package when you want: + +- a local RAG engine inside a Flutter app; +- private document search without uploading user data; +- chat-with-document or chat-with-PDF retrieval; +- on-device semantic search with keyword fallback; +- a Dart API over a Rust retrieval core. + +Use a server-side RAG stack instead when you need centralized multi-user +indexing, managed OCR at scale, or large shared vector databases. + +## Related Docs + +- [README](../README.md) +- [Quick Start](guides/quick_start.md) +- [Search Strategies](features/search_strategies.md) +- [Search by Source](features/search_by_source.md) +- [Troubleshooting](guides/troubleshooting.md) +- [P5-4 Expanded Passage Relevance Report](perf/ondevice-query-profiler/PR-P5-4-expanded-passage-relevance.html) +- [P5-5 PDF-to-RAG Passage Relevance Report](perf/ondevice-query-profiler/PR-P5-5-pdf-to-rag-passage-relevance.html) diff --git a/lib/mobile_rag_engine.dart b/lib/mobile_rag_engine.dart index dedd539..41f8623 100644 --- a/lib/mobile_rag_engine.dart +++ b/lib/mobile_rag_engine.dart @@ -50,6 +50,7 @@ export 'services/rag_engine.dart'; export 'services/context_builder.dart'; export 'services/source_rag_service.dart'; export 'services/embedding_service.dart'; +export 'services/mobile_rag_vector_store.dart'; export 'services/benchmark_service.dart'; export 'services/quality_test_service.dart'; export 'services/prompt_compressor.dart'; diff --git a/lib/services/mobile_rag_vector_store.dart b/lib/services/mobile_rag_vector_store.dart new file mode 100644 index 0000000..3983269 --- /dev/null +++ b/lib/services/mobile_rag_vector_store.dart @@ -0,0 +1,267 @@ +import 'dart:io'; +import 'dart:typed_data'; + +import 'package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart' + show ExternalLibrary; + +import '../src/rust/api/db_pool.dart' as db_pool; +import '../src/rust/api/source_rag.dart' as source_rag; +import '../src/rust/frb_generated.dart'; + +/// LLM-agnostic vector-store facade backed by mobile_rag_engine's source/chunk +/// storage. +/// +/// This is intended for integration packages that already have embeddings from +/// another runtime, such as an on-device LLM package. It maps each external +/// document to one source row with one chunk, while keeping the lower-level +/// Rust APIs private to this package. +class MobileRagVectorStore { + MobileRagVectorStore({ + this.collectionId = 'mobile_rag_vector_store', + this.maxPoolSize = 2, + this.enableHnsw = true, + }); + + static bool _rustInitialized = false; + + final String collectionId; + final int maxPoolSize; + bool enableHnsw; + + var _initialized = false; + var _indexDirty = false; + var _vectorDimension = 0; + + bool get isInitialized => _initialized; + + Future initialize(String databasePath) async { + if (_initialized) return; + + await _ensureRustInitialized(); + await Directory(File(databasePath).parent.path).create(recursive: true); + await db_pool.initDbPool(dbPath: databasePath, maxSize: maxPoolSize); + await source_rag.initSourceDb(); + + if (enableHnsw) { + await source_rag.rebuildChunkHnswIndexForCollection( + collectionId: collectionId, + ); + } + + _initialized = true; + _indexDirty = false; + } + + Future addDocument({ + required String id, + required String content, + required List embedding, + String? metadata, + }) async { + _ensureInitialized(); + _validateEmbeddingDimension(embedding); + + await removeDocument(id: id); + + final source = await source_rag.addSourceInCollection( + collectionId: collectionId, + content: content, + metadata: metadata, + name: id, + ); + + await source_rag.addChunks( + sourceId: source.sourceId, + chunks: [ + source_rag.ChunkData( + content: content, + chunkIndex: 0, + startPos: 0, + endPos: content.length, + chunkType: 'document', + embedding: Float32List.fromList(embedding), + ), + ], + ); + + await source_rag.updateSourceStatus( + sourceId: source.sourceId, + status: 'completed', + ); + _indexDirty = true; + } + + Future removeDocument({required String id}) async { + _ensureInitialized(); + final sources = await source_rag.listSourcesInCollection( + collectionId: collectionId, + ); + + for (final source in sources.where((source) => source.name == id)) { + await source_rag.deleteSourceInCollection( + collectionId: collectionId, + sourceId: source.id, + ); + _indexDirty = true; + } + } + + Future> searchSimilar({ + required List queryEmbedding, + required int topK, + double threshold = 0.0, + }) async { + _ensureInitialized(); + _validateQueryDimension(queryEmbedding); + await _ensureSearchIndex(); + + final hits = enableHnsw + ? await source_rag.searchChunksInCollection( + collectionId: collectionId, + queryEmbedding: queryEmbedding, + topK: topK, + ) + : await source_rag.benchmarkSearchChunksLinearInCollection( + collectionId: collectionId, + queryEmbedding: queryEmbedding, + topK: topK, + ); + + final sourceNames = await _sourceNamesById(); + return hits + .where((hit) => hit.similarity >= threshold) + .map( + (hit) => MobileRagVectorSearchResult( + id: sourceNames[hit.sourceId.toString()] ?? hit.chunkId.toString(), + content: hit.content, + similarity: hit.similarity, + metadata: hit.metadata, + ), + ) + .toList(growable: false); + } + + Future getStats() async { + _ensureInitialized(); + final stats = await source_rag.getSourceStatsInCollection( + collectionId: collectionId, + ); + return MobileRagVectorStoreStats( + documentCount: stats.sourceCount.toInt(), + vectorDimension: _vectorDimension, + ); + } + + Future clear() async { + _ensureInitialized(); + final sources = await source_rag.listSourcesInCollection( + collectionId: collectionId, + ); + for (final source in sources) { + await source_rag.deleteSourceInCollection( + collectionId: collectionId, + sourceId: source.id, + ); + } + _vectorDimension = 0; + _indexDirty = true; + } + + Future close() async { + if (!_initialized) return; + await db_pool.closeDbPool(); + _initialized = false; + _indexDirty = false; + } + + Future> _sourceNamesById() async { + final sources = await source_rag.listSourcesInCollection( + collectionId: collectionId, + ); + return { + for (final source in sources) + if (source.name != null) source.id.toString(): source.name!, + }; + } + + Future _ensureSearchIndex() async { + if (!enableHnsw || !_indexDirty) return; + await source_rag.rebuildChunkHnswIndexForCollection( + collectionId: collectionId, + ); + _indexDirty = false; + } + + void _ensureInitialized() { + if (!_initialized) { + throw StateError('MobileRagVectorStore is not initialized.'); + } + } + + void _validateEmbeddingDimension(List embedding) { + if (embedding.isEmpty) { + throw ArgumentError.value(embedding, 'embedding', 'Must not be empty.'); + } + if (_vectorDimension == 0) { + _vectorDimension = embedding.length; + return; + } + if (embedding.length != _vectorDimension) { + throw ArgumentError( + 'Embedding dimension mismatch: expected $_vectorDimension, ' + 'got ${embedding.length}.', + ); + } + } + + void _validateQueryDimension(List queryEmbedding) { + if (_vectorDimension == 0) return; + if (queryEmbedding.length != _vectorDimension) { + throw ArgumentError( + 'Query embedding dimension mismatch: expected $_vectorDimension, ' + 'got ${queryEmbedding.length}.', + ); + } + } + + static Future _ensureRustInitialized() async { + if (_rustInitialized || RustLib.instance.initialized) { + _rustInitialized = true; + return; + } + + try { + await RustLib.init(); + } catch (_) { + if (!Platform.isMacOS || RustLib.instance.initialized) rethrow; + await RustLib.init( + externalLibrary: ExternalLibrary.process(iKnowHowToUseIt: true), + ); + } + _rustInitialized = true; + } +} + +class MobileRagVectorSearchResult { + const MobileRagVectorSearchResult({ + required this.id, + required this.content, + required this.similarity, + this.metadata, + }); + + final String id; + final String content; + final double similarity; + final String? metadata; +} + +class MobileRagVectorStoreStats { + const MobileRagVectorStoreStats({ + required this.documentCount, + required this.vectorDimension, + }); + + final int documentCount; + final int vectorDimension; +} diff --git a/lib/services/source_rag_service.dart b/lib/services/source_rag_service.dart index 3d9fcdd..6267c7c 100644 --- a/lib/services/source_rag_service.dart +++ b/lib/services/source_rag_service.dart @@ -483,13 +483,22 @@ class SourceRagService { /// Clean up log stream resources (both Dart subscription and Rust sink). Future _cleanupLogStream() async { - await _logSubscription?.cancel(); + final subscription = _logSubscription; _logSubscription = null; + try { closeLogStream(); // Close Rust-side sink } catch (_) { // Ignore errors during cleanup } + + if (subscription == null) return; + + try { + await subscription.cancel().timeout(const Duration(seconds: 1)); + } on TimeoutException { + debugPrint('[SourceRagService] log subscription cancel timed out'); + } } /// Dispose resources held by this service. diff --git a/pubspec.yaml b/pubspec.yaml index 5cb13f4..0ee6518 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,16 +1,16 @@ name: mobile_rag_engine -description: A high-performance, on-device RAG (Retrieval-Augmented Generation) engine for Flutter. Run semantic search completely offline on iOS, Android, and macOS with HNSW vector indexing. +description: Build local/on-device RAG in Flutter with Dart APIs, offline semantic search, HNSW+BM25 hybrid retrieval, and text-layer PDF indexing. version: 0.18.6 homepage: https://github.com/dev07060/mobile_rag_engine repository: https://github.com/dev07060/mobile_rag_engine issue_tracker: https://github.com/dev07060/mobile_rag_engine/issues topics: - - llm - - machine-learning + - flutter + - rag + - on-device - semantic-search - vector-database - - rag environment: sdk: ^3.9.2 @@ -19,7 +19,7 @@ environment: dependencies: flutter: sdk: flutter - flutter_rust_bridge: ^2.11.1 + flutter_rust_bridge: 2.11.1 rag_engine_flutter: ^0.18.4 path_provider: ^2.1.5 onnxruntime: ^1.4.1 diff --git a/rust_builder/cargokit/gradle/plugin.gradle b/rust_builder/cargokit/gradle/plugin.gradle index 4af35ee..8d9f111 100644 --- a/rust_builder/cargokit/gradle/plugin.gradle +++ b/rust_builder/cargokit/gradle/plugin.gradle @@ -133,12 +133,15 @@ class CargoKitPlugin implements Plugin { jniLibs.srcDir(new File(cargoOutputDir)) def platforms = com.flutter.gradle.FlutterPluginUtils.getTargetPlatforms(project).collect() + def hasExplicitTargetPlatform = project.hasProperty("target-platform") - // Same thing addFlutterDependencies does in flutter.gradle - if (buildType == "debug") { + // Match Flutter's debug defaults only when the build did not request + // a narrower target platform, e.g. CI's android-arm64 smoke build. + if (buildType == "debug" && !hasExplicitTargetPlatform) { platforms.add("android-x86") platforms.add("android-x64") } + platforms = platforms.unique() // The task name depends on plugin properties, which are not available // at this point diff --git a/test/native/mobile_rag_vector_store_test.dart b/test/native/mobile_rag_vector_store_test.dart new file mode 100644 index 0000000..f395ded --- /dev/null +++ b/test/native/mobile_rag_vector_store_test.dart @@ -0,0 +1,50 @@ +import 'dart:io'; + +import 'package:flutter_test/flutter_test.dart'; +import 'package:mobile_rag_engine/mobile_rag_engine.dart'; + +void main() { + test('MobileRagVectorStore stores and searches precomputed embeddings', + () async { + final dir = await Directory.systemTemp.createTemp( + 'mobile_rag_vector_store_', + ); + final store = MobileRagVectorStore(collectionId: 'test-vector-store'); + + try { + await store.initialize('${dir.path}/vector_store.sqlite'); + + expect(store.isInitialized, isTrue); + expect(store.enableHnsw, isTrue); + + await store.addDocument( + id: 'doc-1', + content: 'mobile_rag_engine provides retrieval for Flutter apps.', + embedding: const [1, 0, 0], + metadata: '{"kind":"probe"}', + ); + + final hits = await store.searchSimilar( + queryEmbedding: const [1, 0, 0], + topK: 3, + ); + + expect(hits, isNotEmpty); + expect(hits.first.id, 'doc-1'); + expect(hits.first.content, contains('retrieval')); + expect(hits.first.metadata, '{"kind":"probe"}'); + + final stats = await store.getStats(); + expect(stats.documentCount, 1); + expect(stats.vectorDimension, 3); + + await store.clear(); + final cleared = await store.getStats(); + expect(cleared.documentCount, 0); + expect(cleared.vectorDimension, 0); + } finally { + await store.close(); + await dir.delete(recursive: true); + } + }); +} diff --git a/test/native/source_rag_service_lifecycle_test.dart b/test/native/source_rag_service_lifecycle_test.dart new file mode 100644 index 0000000..e6e1d81 --- /dev/null +++ b/test/native/source_rag_service_lifecycle_test.dart @@ -0,0 +1,42 @@ +import 'dart:io'; + +import 'package:flutter_test/flutter_test.dart'; +import 'package:mobile_rag_engine/services/source_rag_service.dart'; +import 'package:mobile_rag_engine/src/rust/api/db_pool.dart'; +import 'package:mobile_rag_engine/src/rust/api/logger.dart'; +import 'package:mobile_rag_engine/src/rust/frb_generated.dart'; + +Future _ensureRustLoaded() async { + if (!RustLib.instance.initialized) { + await RustLib.init(); + } +} + +void main() { + setUpAll(() async { + await _ensureRustLoaded(); + }); + + test('SourceRagService.dispose closes log stream without hanging', () async { + final dir = await Directory.systemTemp.createTemp( + 'mobile_rag_source_lifecycle_', + ); + final dbPath = '${dir.path}/source_lifecycle.sqlite'; + final service = SourceRagService(dbPath: dbPath); + + try { + await initDbPool(dbPath: dbPath, maxSize: 2); + await service.init(deferIndexWarmup: false); + + await service.dispose().timeout(const Duration(seconds: 2)); + } finally { + try { + closeLogStream(); + } catch (_) {} + try { + await closeDbPool(); + } catch (_) {} + await dir.delete(recursive: true); + } + }); +}