|
511 | 511 | " # Format results\n", |
512 | 512 | " formatted_results = []\n", |
513 | 513 | " for hit in results[0]:\n", |
| 514 | + " # For COSINE metric, Milvus returns similarity scores (higher = more similar)\n", |
| 515 | + " # No need to convert - use the score directly\n", |
| 516 | + " similarity_score = hit[\"distance\"] # This is actually similarity for COSINE\n", |
| 517 | + " \n", |
514 | 518 | " formatted_results.append({\n", |
515 | 519 | " \"text\": hit[\"text\"],\n", |
516 | 520 | " \"metadata\": json.loads(hit[\"metadata\"]),\n", |
517 | | - " \"score\": 1.0 - hit[\"distance\"], # Convert distance to similarity score for COSINE\n", |
| 521 | + " \"score\": similarity_score, # Use direct similarity score\n", |
518 | 522 | " \"id\": hit[\"id\"]\n", |
519 | 523 | " })\n", |
520 | 524 | " \n", |
|
851 | 855 | "metadata": {}, |
852 | 856 | "outputs": [], |
853 | 857 | "source": [ |
854 | | - "# Test Sample Queries\n", |
855 | | - "queries = [\n", |
856 | | - " \"What is artificial intelligence?\",\n", |
857 | | - " \"How does Milvus work as a vector database?\",\n", |
858 | | - " \"Explain the RAG pipeline steps\",\n", |
859 | | - " \"What are the differences between machine learning and deep learning?\"\n", |
860 | | - "]\n", |
| 858 | + "# Debug Sample Queries - Test one specific query with detailed logging\n", |
| 859 | + "print(\"🔍 Debug: Testing specific query with detailed logging\\n\")\n", |
861 | 860 | "\n", |
862 | | - "print(\"Testing RAG Pipeline with sample queries...\\n\")\n", |
| 861 | + "# Test the Milvus query that should definitely work\n", |
| 862 | + "test_query = \"How does Milvus work as a vector database?\"\n", |
| 863 | + "print(f\"Query: {test_query}\")\n", |
| 864 | + "print(\"-\" * 50)\n", |
863 | 865 | "\n", |
864 | | - "for i, query in enumerate(queries, 1):\n", |
865 | | - " print(f\"Query {i}: {query}\")\n", |
866 | | - " print(\"-\" * 50)\n", |
867 | | - " \n", |
868 | | - " result = rag.query(query, top_k=3)\n", |
869 | | - " \n", |
870 | | - " print(f\"Answer: {result['answer']}\")\n", |
871 | | - " print(f\"\\nRetrieved {result['num_retrieved']} documents:\")\n", |
872 | | - " \n", |
873 | | - " for j, doc in enumerate(result['retrieved_documents'], 1):\n", |
874 | | - " print(f\" {j}. Score: {doc['score']:.4f}\")\n", |
875 | | - " print(f\" Text: {doc['text'][:100]}...\")\n", |
876 | | - " print(f\" Source: {doc['metadata'].get('source', 'Unknown')}\")\n", |
| 866 | + "# Get query embedding\n", |
| 867 | + "query_embedding = rag.embedding_generator.embed_text(test_query)\n", |
| 868 | + "print(f\"✅ Generated query embedding: shape {query_embedding.shape}\")\n", |
| 869 | + "\n", |
| 870 | + "# Test direct search on vector store\n", |
| 871 | + "print(\"🔍 Testing direct vector store search...\")\n", |
| 872 | + "try:\n", |
| 873 | + " search_results = rag.vector_store.search(query_embedding, top_k=5)\n", |
| 874 | + " print(f\"✅ Direct search returned {len(search_results)} results\")\n", |
877 | 875 | " \n", |
878 | | - " print(\"\\n\" + \"=\" * 80 + \"\\n\")" |
| 876 | + " if search_results:\n", |
| 877 | + " for i, result in enumerate(search_results):\n", |
| 878 | + " print(f\" Result {i+1}:\")\n", |
| 879 | + " print(f\" Score: {result.get('score', 'N/A')}\")\n", |
| 880 | + " print(f\" Text preview: {str(result.get('text', 'N/A'))[:100]}...\")\n", |
| 881 | + " print(f\" ID: {result.get('id', 'N/A')}\")\n", |
| 882 | + " else:\n", |
| 883 | + " print(\" ❌ No results from direct search\")\n", |
| 884 | + " \n", |
| 885 | + "except Exception as e:\n", |
| 886 | + " print(f\"❌ Direct search failed: {e}\")\n", |
| 887 | + "\n", |
| 888 | + "print(\"\\n\" + \"=\" * 50)\n", |
| 889 | + "\n", |
| 890 | + "# Now test full RAG pipeline\n", |
| 891 | + "print(\"🔍 Testing full RAG pipeline...\")\n", |
| 892 | + "result = rag.query(test_query, top_k=5)\n", |
| 893 | + "print(f\"Full pipeline returned {result['num_retrieved']} documents\")\n", |
| 894 | + "print(f\"Answer: {result['answer'][:200]}...\")\n", |
| 895 | + "\n", |
| 896 | + "print(\"\\n🔍 Let's also test a simple keyword match:\")\n", |
| 897 | + "keyword_result = rag.query(\"Milvus vector database\", top_k=5) \n", |
| 898 | + "print(f\"Keyword query returned {keyword_result['num_retrieved']} documents\")" |
879 | 899 | ] |
880 | 900 | }, |
881 | 901 | { |
|
0 commit comments