From f0c8715126d3baf9aea561b944d786f0fc2c9ced Mon Sep 17 00:00:00 2001 From: jedrazb Date: Sat, 14 Oct 2023 09:15:20 +0200 Subject: [PATCH] Updates --- .../tech-blog/elasticsearch-data-connectors/page-data.json | 2 +- rss.xml | 2 +- tech-blog/elasticsearch-data-connectors/index.html | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/page-data/tech-blog/elasticsearch-data-connectors/page-data.json b/page-data/tech-blog/elasticsearch-data-connectors/page-data.json index e32d1741d..dca6c5881 100644 --- a/page-data/tech-blog/elasticsearch-data-connectors/page-data.json +++ b/page-data/tech-blog/elasticsearch-data-connectors/page-data.json @@ -1 +1 @@ -{"componentChunkName":"component---src-templates-tech-blog-post-js","path":"/tech-blog/elasticsearch-data-connectors/","result":{"data":{"site":{"siteMetadata":{"title":"Jedr's Blog","author":"Jedr Blaszyk","siteUrl":"https://j.blaszyk.me"}},"mdx":{"id":"8441ab06-882b-519d-b026-3e5fc2037647","body":"var _excluded = [\"components\"];\n\nfunction _extends() { _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; return _extends.apply(this, arguments); }\n\nfunction _objectWithoutProperties(source, excluded) { if (source == null) return {}; var target = _objectWithoutPropertiesLoose(source, excluded); var key, i; if (Object.getOwnPropertySymbols) { var sourceSymbolKeys = Object.getOwnPropertySymbols(source); for (i = 0; i < sourceSymbolKeys.length; i++) { key = sourceSymbolKeys[i]; if (excluded.indexOf(key) >= 0) continue; if (!Object.prototype.propertyIsEnumerable.call(source, key)) continue; target[key] = source[key]; } } return target; }\n\nfunction _objectWithoutPropertiesLoose(source, excluded) { if (source == null) return {}; var target = {}; var sourceKeys = Object.keys(source); var key, i; for (i = 0; i < sourceKeys.length; i++) { key = sourceKeys[i]; if (excluded.indexOf(key) >= 0) continue; target[key] = source[key]; } return target; }\n\n/* @jsxRuntime classic */\n\n/* @jsx mdx */\nvar _frontmatter = {\n \"title\": \"Elastic Data Connectors - Elasticsearch Ingestion Made Simple\",\n \"date\": \"2023-10-05\",\n \"spoiler\": \"Overview of a new, lightweight data connectors framework integrated with the Elastic stack that can ingest data from any source into the Elasticsearch index.\",\n \"images\": [\"./connectors-architecture.png\", \"./google_drive_connector.png\"]\n};\n\nvar makeShortcode = function makeShortcode(name) {\n return function MDXDefaultShortcode(props) {\n console.warn(\"Component \" + name + \" was not imported, exported, or provided by MDXProvider as global scope\");\n return mdx(\"div\", props);\n };\n};\n\nvar ImageComponent = makeShortcode(\"ImageComponent\");\nvar layoutProps = {\n _frontmatter: _frontmatter\n};\nvar MDXLayout = \"wrapper\";\nreturn function MDXContent(_ref) {\n var components = _ref.components,\n props = _objectWithoutProperties(_ref, _excluded);\n\n return mdx(MDXLayout, _extends({}, layoutProps, props, {\n components: components,\n mdxType: \"MDXLayout\"\n }), mdx(\"p\", null, \"In any search application, the is an underlying ingestion pipeline that indexes the data. While developers often focus on speedy searches, they tend to overlook the aspect of data indexing. It\\u2019s important to keep in mind that easy-to-manage data ingestion is a key foundation for successful data-driven solutions.\"), mdx(\"p\", null, \"Elasticsearch (ES) has many ingestion tools, but sometimes they don\\u2019t fit all data sources. That\\u2019s where \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elastic Data Connectors\"), \" come in. They let you easily link custom or proprietry data to Elasticsearch. This framework is lightweight, open-source and flexible, and works both on-premise and in the cloud. In short, it\\u2019s a straightforward way to keep any content source, e.g. database, cloud storage or local file system, in sync with a search-optimized Elasticsearch index.\"), mdx(\"p\", null, \"Other ingestion solutions offered by the Enterprise Search component in the Elastic stack are \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/crawler.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Web Crawler\"), \" and \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/ingestion-apis.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Index API\"), \".\"), mdx(\"h2\", {\n \"id\": \"data-connectors-framework\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#data-connectors-framework\",\n \"aria-label\": \"data connectors framework permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Data Connectors Framework\"), mdx(\"p\", null, mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors-python\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"elastic/connectors-python\")), mdx(\"p\", null, \"The data connectors framework makes it easier for developers to create connector clients that can sync data from other sources into Elasticsearch. The framework takes care of essential tasks like scheduling data syncs, extracting text from files, and setting up index mappings automatically. This way, developers can concentrate on integrating their chosen data source without worrying about these common tasks.\"), mdx(ImageComponent, {\n image: props.frontmatter.images[1],\n description: \"Elastic data connectors - Google Drive connector syncing data\",\n mdxType: \"ImageComponent\"\n }), mdx(\"p\", null, \"The data connectors framework can be configured to use Elasticsearch\\u2019s \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/ingest-pipelines.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"ingestion pipelines\"), \" to perform transformations on data before storing it in an index. A common use case is \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/document-enrichment.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"document enrichment with machine learning\"), \". For example, you can:\"), mdx(\"ul\", null, mdx(\"li\", {\n parentName: \"ul\"\n }, \"analyze text fields using a \", mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/machine-learning/8.10/ml-nlp-search-compare.html#ml-nlp-text-embedding\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Text embedding\"), \" model that will generate a dense vector representation of your data\"), mdx(\"li\", {\n parentName: \"ul\"\n }, \"run text classification for sentiment analysis\"), mdx(\"li\", {\n parentName: \"ul\"\n }, \"extract key information from text with Named Entitiy Recogintion (NER)\")), mdx(\"h2\", {\n \"id\": \"architecture\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#architecture\",\n \"aria-label\": \"architecture permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Architecture\"), mdx(\"p\", null, \"The data connectors framework is deployed as a separate stateless service. You can host them yourself, or for selected native connectors run them in the Elastic Cloud natively.\"), mdx(\"p\", null, \"The framework \", mdx(\"strong\", {\n parentName: \"p\"\n }, \"connects\"), \" your third-party data source with an Elasticsearch index and keeps it in sync, so that you can focus on search and analytics with your data.\"), mdx(ImageComponent, {\n image: props.frontmatter.images[0],\n description: \"Elastic Data Connectors Architecture\",\n mdxType: \"ImageComponent\"\n }), mdx(\"h2\", {\n \"id\": \"connector-protocol\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#connector-protocol\",\n \"aria-label\": \"connector protocol permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Connector Protocol\"), mdx(\"p\", null, \"Connectors framework relies on the \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors-python/blob/main/docs/CONNECTOR_PROTOCOL.md\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Connector Protocol\"), \". All communication between connectors and other parts of the stack happens asynchronously through an Elasticsearch index. This comes with several benefits.\"), mdx(\"ul\", null, mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"strong\", {\n parentName: \"li\"\n }, \"Stateless deployment\"), \": The data connectors service relies on external state in an ES index\"), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"strong\", {\n parentName: \"li\"\n }, \"Fault-tolerance\"), \": The service can resume operation on a different host after a restart or a failure. Once it reestablishes connection with Elasticsearch, it will continue its normal operation.\"), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"strong\", {\n parentName: \"li\"\n }, \"Developers have control over a deployment\"), \" - This service can be easily self-hosted or run in the Elastic cloud. It only needs to be able to discover your Elasticsearch instance over the network.\")), mdx(\"p\", null, \"This setup is developer friendly and aims to make connectors service easy to deploy and manage. The framework is written in async python making this IO-bound framework lightweight, fast and efficient.\"), mdx(\"h2\", {\n \"id\": \"available-connectors\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#available-connectors\",\n \"aria-label\": \"available connectors permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Available connectors\"), mdx(\"p\", null, \"Currently, the following content sources are supported as of version \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"8.10\"), \". For an up-to-date list of supported connectors check \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"the official documentation\"), \".\"), mdx(\"ul\", null, mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-azure-blob.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Azure Blob Storage Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-confluence.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Confluence Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-gmail.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Gmail Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-google-cloud.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Google Cloud Storage Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-google-drive.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Google Drive Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-jira.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Jira Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-mongodb.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch MongoDB Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-ms-sql.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch MicrosoftSQL Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-mysql.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch MySQL Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-network-drive.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Network drive Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-onedrive.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch OneDrive Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-oracle.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Oracle Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-postgresql.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch PostgreSQL Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-s3.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch S3 Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-salesforce.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Salesforce Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-sharepoint.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch SharePoint Server Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-sharepoint-online.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch SharePoint Online Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-slack.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Slack Connector\"))), mdx(\"p\", null, \"You can check the connector implementations in: \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors-python/tree/main/connectors/sources\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"connectors-python/connectors/sources\"), \".\"), mdx(\"h2\", {\n \"id\": \"custom-connectors\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#custom-connectors\",\n \"aria-label\": \"custom connectors permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Custom connectors\"), mdx(\"p\", null, \"You are not limited to the connectors included in the data connectors framework. It is easy to implement a custom data connector using the abstractions provided by the framework.\"), mdx(\"p\", null, \"All you need to do is define a custom \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"DataSource\"), \" class in async python and the framework will take care of making it compatible with common functionalities, such as scheduling data syncs, extracting text from files, and setting up index mappings automatically.\"), mdx(\"p\", null, \"Here is an example starting point for implementing \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"MyCustomDataSource\"), \". In order to turn this into a functional connector you need to define \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"get_default_configuration\"), \", \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"ping\"), \" and \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"get_docs\"), \" methods.\"), mdx(\"div\", {\n \"className\": \"gatsby-highlight\",\n \"data-language\": \"python\"\n }, mdx(\"pre\", {\n parentName: \"div\",\n \"className\": \"language-python\"\n }, mdx(\"code\", {\n parentName: \"pre\",\n \"className\": \"language-python\"\n }, mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"class\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token class-name\"\n }, \"MyCustomDataSource\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"BaseDataSource\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"Connector to my custom data source\\\"\\\"\\\"\"), \"\\n\\n name \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token operator\"\n }, \"=\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token string\"\n }, \"\\\"Custom Source\\\"\"), \"\\n service_type \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token operator\"\n }, \"=\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token string\"\n }, \"\\\"custom_source\\\"\"), \"\\n\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token decorator annotation punctuation\"\n }, \"@classmethod\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"def\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token function\"\n }, \"get_default_configuration\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"cls\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"Returns a dict with a default configuration\\\"\\\"\\\"\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"raise\"), \" NotImplementedError\\n\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"async\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"def\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token function\"\n }, \"ping\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"self\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"When called, pings the backend\\n\\n If the backend has an issue, raises an exception\\n \\\"\\\"\\\"\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"raise\"), \" NotImplementedError\\n\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"async\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"def\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token function\"\n }, \"get_docs\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"self\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \",\"), \" filtering\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token operator\"\n }, \"=\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token boolean\"\n }, \"None\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"Returns an iterator on all documents present in the backend\\n\\n Each document is a tuple with:\\n - a mapping with the data to index\\n - a coroutine to download extra data (attachments)\\n\\n The mapping should have least an `id` field\\n and optionally a `timestamp` field in ISO 8601 UTC\\n\\n The coroutine is called if the document needs to be synced\\n and has attachments. It needs to return a mapping to index.\\n\\n It has two arguments: doit and timestamp\\n If doit is False, it should return None immediately.\\n If timestamp is provided, it should be used in the mapping.\\n\\n Example:\\n\\n async def get_file(doit=True, timestamp=None):\\n if not doit:\\n return\\n return {'TEXT': 'DATA', 'timestamp': timestamp,\\n 'id': 'doc-id'}\\n \\\"\\\"\\\"\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"raise\"), \" NotImplementedError\"))), mdx(\"p\", null, \"To learn more about how to write a custom connector, refer to the \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/search-labs/how-to-create-customized-connectors-for-elasticsearch\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"How to create customized connectors for Elasticsearch\"), \" blog post or check the implementation of \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors-python/tree/main/connectors/sources\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"existing connectors\"), \".\"), mdx(\"p\", null, \"You can use \", mdx(\"em\", {\n parentName: \"p\"\n }, \"Customized connector\"), \" to configure and run your custom connector from Kibana. Navigate to: \", mdx(\"strong\", {\n parentName: \"p\"\n }, \"Search > Indices > Create a new index > Connector > Customized connector\"), \".\"), mdx(\"p\", null, \"If you want to contribute your custom connector to the open-source connectors framework, refer to the \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors-python/blob/main/docs/CONTRIBUTING.md\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"How to contribute connectors guide\"), \" in the \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"connectors-python\"), \" repository.\"));\n}\n;\nMDXContent.isMDXComponent = true;","tableOfContents":{"items":[{"url":"#data-connectors-framework","title":"Data Connectors Framework"},{"url":"#architecture","title":"Architecture"},{"url":"#connector-protocol","title":"Connector Protocol"},{"url":"#available-connectors","title":"Available connectors"},{"url":"#custom-connectors","title":"Custom connectors"}]},"timeToRead":3,"frontmatter":{"title":"Elastic Data Connectors - Elasticsearch Ingestion Made Simple","date":"October 05, 2023","spoiler":"Overview of a new, lightweight data connectors framework integrated with the Elastic stack that can ingest data from any source into the Elasticsearch index.","ogimage":null,"images":[{"childImageSharp":{"gatsbyImageData":{"layout":"constrained","placeholder":{"fallback":""},"backgroundColor":"transparent","images":{"fallback":{"src":"/static/55addb8c1fa1dbb9fb3c906832de5c91/6cc44/connectors-architecture.png","srcSet":"/static/55addb8c1fa1dbb9fb3c906832de5c91/b96cf/connectors-architecture.png 750w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/439ee/connectors-architecture.png 1080w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/7afce/connectors-architecture.png 1366w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/6cc44/connectors-architecture.png 1400w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/13a12/connectors-architecture.png 1920w","sizes":"(min-width: 1400px) 1400px, 100vw"},"sources":[{"srcSet":"/static/55addb8c1fa1dbb9fb3c906832de5c91/c4a99/connectors-architecture.webp 750w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/719a3/connectors-architecture.webp 1080w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/14fd2/connectors-architecture.webp 1366w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/a48e7/connectors-architecture.webp 1400w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/9e507/connectors-architecture.webp 1920w","type":"image/webp","sizes":"(min-width: 1400px) 1400px, 100vw"}]},"width":1400,"height":739}}},{"childImageSharp":{"gatsbyImageData":{"layout":"constrained","placeholder":{"fallback":""},"backgroundColor":"transparent","images":{"fallback":{"src":"/static/39bbda5d2268372a7b1897ac057c3a71/72368/google_drive_connector.png","srcSet":"/static/39bbda5d2268372a7b1897ac057c3a71/2479c/google_drive_connector.png 750w,\n/static/39bbda5d2268372a7b1897ac057c3a71/f45fa/google_drive_connector.png 1080w,\n/static/39bbda5d2268372a7b1897ac057c3a71/fc4d4/google_drive_connector.png 1366w,\n/static/39bbda5d2268372a7b1897ac057c3a71/72368/google_drive_connector.png 1400w,\n/static/39bbda5d2268372a7b1897ac057c3a71/8ebad/google_drive_connector.png 1920w","sizes":"(min-width: 1400px) 1400px, 100vw"},"sources":[{"srcSet":"/static/39bbda5d2268372a7b1897ac057c3a71/cc1d9/google_drive_connector.webp 750w,\n/static/39bbda5d2268372a7b1897ac057c3a71/05669/google_drive_connector.webp 1080w,\n/static/39bbda5d2268372a7b1897ac057c3a71/60efe/google_drive_connector.webp 1366w,\n/static/39bbda5d2268372a7b1897ac057c3a71/be3c0/google_drive_connector.webp 1400w,\n/static/39bbda5d2268372a7b1897ac057c3a71/7f6be/google_drive_connector.webp 1920w","type":"image/webp","sizes":"(min-width: 1400px) 1400px, 100vw"}]},"width":1400,"height":782}}}],"blogImages":null},"fields":{"slug":"/elasticsearch-data-connectors/","category":"tech-blog"}}},"pageContext":{"slug":"/elasticsearch-data-connectors/","previous":{"fields":{"slug":"/exploring-apache-lucene-scale/","directoryName":"exploring-apache-lucene-scale","category":"tech-blog"},"frontmatter":{"title":"Exploring Apache Lucene - Part 3: Running at Scale"}},"next":null}},"staticQueryHashes":["4190863273"]} \ No newline at end of file +{"componentChunkName":"component---src-templates-tech-blog-post-js","path":"/tech-blog/elasticsearch-data-connectors/","result":{"data":{"site":{"siteMetadata":{"title":"Jedr's Blog","author":"Jedr Blaszyk","siteUrl":"https://j.blaszyk.me"}},"mdx":{"id":"8441ab06-882b-519d-b026-3e5fc2037647","body":"var _excluded = [\"components\"];\n\nfunction _extends() { _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; return _extends.apply(this, arguments); }\n\nfunction _objectWithoutProperties(source, excluded) { if (source == null) return {}; var target = _objectWithoutPropertiesLoose(source, excluded); var key, i; if (Object.getOwnPropertySymbols) { var sourceSymbolKeys = Object.getOwnPropertySymbols(source); for (i = 0; i < sourceSymbolKeys.length; i++) { key = sourceSymbolKeys[i]; if (excluded.indexOf(key) >= 0) continue; if (!Object.prototype.propertyIsEnumerable.call(source, key)) continue; target[key] = source[key]; } } return target; }\n\nfunction _objectWithoutPropertiesLoose(source, excluded) { if (source == null) return {}; var target = {}; var sourceKeys = Object.keys(source); var key, i; for (i = 0; i < sourceKeys.length; i++) { key = sourceKeys[i]; if (excluded.indexOf(key) >= 0) continue; target[key] = source[key]; } return target; }\n\n/* @jsxRuntime classic */\n\n/* @jsx mdx */\nvar _frontmatter = {\n \"title\": \"Elastic Data Connectors - Elasticsearch Ingestion Made Simple\",\n \"date\": \"2023-10-05\",\n \"spoiler\": \"Overview of a new, lightweight data connectors framework integrated with the Elastic stack that can ingest data from any source into the Elasticsearch index.\",\n \"images\": [\"./connectors-architecture.png\", \"./google_drive_connector.png\"]\n};\n\nvar makeShortcode = function makeShortcode(name) {\n return function MDXDefaultShortcode(props) {\n console.warn(\"Component \" + name + \" was not imported, exported, or provided by MDXProvider as global scope\");\n return mdx(\"div\", props);\n };\n};\n\nvar ImageComponent = makeShortcode(\"ImageComponent\");\nvar layoutProps = {\n _frontmatter: _frontmatter\n};\nvar MDXLayout = \"wrapper\";\nreturn function MDXContent(_ref) {\n var components = _ref.components,\n props = _objectWithoutProperties(_ref, _excluded);\n\n return mdx(MDXLayout, _extends({}, layoutProps, props, {\n components: components,\n mdxType: \"MDXLayout\"\n }), mdx(\"p\", null, \"In any search application, the is an underlying ingestion pipeline that indexes the data. While developers often focus on speedy searches, they tend to overlook the aspect of data indexing. It\\u2019s important to keep in mind that easy-to-manage data ingestion is a key foundation for successful data-driven solutions.\"), mdx(\"p\", null, \"Elasticsearch (ES) has many ingestion tools, but sometimes they don\\u2019t fit all data sources. That\\u2019s where \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elastic Data Connectors\"), \" come in. They let you easily link custom or proprietry data to Elasticsearch. This framework is lightweight, open-source and flexible, and works both on-premise and in the cloud. In short, it\\u2019s a straightforward way to keep any content source, e.g. database, cloud storage or local file system, in sync with a search-optimized Elasticsearch index.\"), mdx(\"p\", null, \"Other ingestion solutions offered by the Enterprise Search component in the Elastic stack are \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/crawler.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Web Crawler\"), \" and \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/ingestion-apis.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Index API\"), \".\"), mdx(\"h2\", {\n \"id\": \"data-connectors-framework\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#data-connectors-framework\",\n \"aria-label\": \"data connectors framework permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Data Connectors Framework\"), mdx(\"p\", null, mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"elastic/connectors\")), mdx(\"p\", null, \"The data connectors framework makes it easier for developers to create connector clients that can sync data from other sources into Elasticsearch. The framework takes care of essential tasks like scheduling data syncs, extracting text from files, and setting up index mappings automatically. This way, developers can concentrate on integrating their chosen data source without worrying about these common tasks.\"), mdx(ImageComponent, {\n image: props.frontmatter.images[1],\n description: \"Elastic data connectors - Google Drive connector syncing data\",\n mdxType: \"ImageComponent\"\n }), mdx(\"p\", null, \"The data connectors framework can be configured to use Elasticsearch\\u2019s \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/ingest-pipelines.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"ingestion pipelines\"), \" to perform transformations on data before storing it in an index. A common use case is \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/document-enrichment.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"document enrichment with machine learning\"), \". For example, you can:\"), mdx(\"ul\", null, mdx(\"li\", {\n parentName: \"ul\"\n }, \"analyze text fields using a \", mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/machine-learning/8.10/ml-nlp-search-compare.html#ml-nlp-text-embedding\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Text embedding\"), \" model that will generate a dense vector representation of your data\"), mdx(\"li\", {\n parentName: \"ul\"\n }, \"run text classification for sentiment analysis\"), mdx(\"li\", {\n parentName: \"ul\"\n }, \"extract key information from text with Named Entitiy Recogintion (NER)\")), mdx(\"h2\", {\n \"id\": \"architecture\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#architecture\",\n \"aria-label\": \"architecture permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Architecture\"), mdx(\"p\", null, \"The data connectors framework is deployed as a separate stateless service. You can host them yourself, or for selected native connectors run them in the Elastic Cloud natively.\"), mdx(\"p\", null, \"The framework \", mdx(\"strong\", {\n parentName: \"p\"\n }, \"connects\"), \" your third-party data source with an Elasticsearch index and keeps it in sync, so that you can focus on search and analytics with your data.\"), mdx(ImageComponent, {\n image: props.frontmatter.images[0],\n description: \"Elastic Data Connectors Architecture\",\n mdxType: \"ImageComponent\"\n }), mdx(\"h2\", {\n \"id\": \"connector-protocol\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#connector-protocol\",\n \"aria-label\": \"connector protocol permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Connector Protocol\"), mdx(\"p\", null, \"Connectors framework relies on the \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Connector Protocol\"), \". All communication between connectors and other parts of the stack happens asynchronously through an Elasticsearch index. This comes with several benefits.\"), mdx(\"ul\", null, mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"strong\", {\n parentName: \"li\"\n }, \"Stateless deployment\"), \": The data connectors service relies on external state in an ES index\"), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"strong\", {\n parentName: \"li\"\n }, \"Fault-tolerance\"), \": The service can resume operation on a different host after a restart or a failure. Once it reestablishes connection with Elasticsearch, it will continue its normal operation.\"), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"strong\", {\n parentName: \"li\"\n }, \"Developers have control over a deployment\"), \" - This service can be easily self-hosted or run in the Elastic cloud. It only needs to be able to discover your Elasticsearch instance over the network.\")), mdx(\"p\", null, \"This setup is developer friendly and aims to make connectors service easy to deploy and manage. The framework is written in async python making this IO-bound framework lightweight, fast and efficient.\"), mdx(\"h2\", {\n \"id\": \"available-connectors\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#available-connectors\",\n \"aria-label\": \"available connectors permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Available connectors\"), mdx(\"p\", null, \"Currently, the following content sources are supported as of version \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"8.10\"), \". For an up-to-date list of supported connectors check \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"the official documentation\"), \".\"), mdx(\"ul\", null, mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-azure-blob.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Azure Blob Storage Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-confluence.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Confluence Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-gmail.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Gmail Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-google-cloud.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Google Cloud Storage Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-google-drive.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Google Drive Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-jira.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Jira Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-mongodb.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch MongoDB Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-ms-sql.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch MicrosoftSQL Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-mysql.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch MySQL Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-network-drive.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Network drive Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-onedrive.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch OneDrive Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-oracle.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Oracle Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-postgresql.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch PostgreSQL Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-s3.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch S3 Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-salesforce.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Salesforce Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-sharepoint.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch SharePoint Server Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-sharepoint-online.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch SharePoint Online Connector\")), mdx(\"li\", {\n parentName: \"ul\"\n }, mdx(\"a\", {\n parentName: \"li\",\n \"href\": \"https://www.elastic.co/guide/en/enterprise-search/current/connectors-slack.html\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"Elasticsearch Slack Connector\"))), mdx(\"p\", null, \"You can check the connector implementations in: \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors/tree/main/connectors/sources\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"connectors/sources\"), \".\"), mdx(\"h2\", {\n \"id\": \"custom-connectors\",\n \"style\": {\n \"position\": \"relative\"\n }\n }, mdx(\"a\", {\n parentName: \"h2\",\n \"href\": \"#custom-connectors\",\n \"aria-label\": \"custom connectors permalink\",\n \"className\": \"anchor before\"\n }, mdx(\"svg\", {\n parentName: \"a\",\n \"aria-hidden\": \"true\",\n \"focusable\": \"false\",\n \"height\": \"16\",\n \"version\": \"1.1\",\n \"viewBox\": \"0 0 16 16\",\n \"width\": \"16\"\n }, mdx(\"path\", {\n parentName: \"svg\",\n \"fillRule\": \"evenodd\",\n \"d\": \"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"\n }))), \"Custom connectors\"), mdx(\"p\", null, \"You are not limited to the connectors included in the data connectors framework. It is easy to implement a custom data connector using the abstractions provided by the framework.\"), mdx(\"p\", null, \"All you need to do is define a custom \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"DataSource\"), \" class in async python and the framework will take care of making it compatible with common functionalities, such as scheduling data syncs, extracting text from files, and setting up index mappings automatically.\"), mdx(\"p\", null, \"Here is an example starting point for implementing \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"MyCustomDataSource\"), \". In order to turn this into a functional connector you need to define \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"get_default_configuration\"), \", \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"ping\"), \" and \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"get_docs\"), \" methods.\"), mdx(\"div\", {\n \"className\": \"gatsby-highlight\",\n \"data-language\": \"python\"\n }, mdx(\"pre\", {\n parentName: \"div\",\n \"className\": \"language-python\"\n }, mdx(\"code\", {\n parentName: \"pre\",\n \"className\": \"language-python\"\n }, mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"class\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token class-name\"\n }, \"MyCustomDataSource\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"BaseDataSource\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"Connector to my custom data source\\\"\\\"\\\"\"), \"\\n\\n name \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token operator\"\n }, \"=\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token string\"\n }, \"\\\"Custom Source\\\"\"), \"\\n service_type \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token operator\"\n }, \"=\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token string\"\n }, \"\\\"custom_source\\\"\"), \"\\n\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token decorator annotation punctuation\"\n }, \"@classmethod\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"def\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token function\"\n }, \"get_default_configuration\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"cls\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"Returns a dict with a default configuration\\\"\\\"\\\"\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"raise\"), \" NotImplementedError\\n\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"async\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"def\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token function\"\n }, \"ping\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"self\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"When called, pings the backend\\n\\n If the backend has an issue, raises an exception\\n \\\"\\\"\\\"\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"raise\"), \" NotImplementedError\\n\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"async\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"def\"), \" \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token function\"\n }, \"get_docs\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \"(\"), \"self\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \",\"), \" filtering\", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token operator\"\n }, \"=\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token boolean\"\n }, \"None\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \")\"), mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token punctuation\"\n }, \":\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token triple-quoted-string string\"\n }, \"\\\"\\\"\\\"Returns an iterator on all documents present in the backend\\n\\n Each document is a tuple with:\\n - a mapping with the data to index\\n - a coroutine to download extra data (attachments)\\n\\n The mapping should have least an `id` field\\n and optionally a `timestamp` field in ISO 8601 UTC\\n\\n The coroutine is called if the document needs to be synced\\n and has attachments. It needs to return a mapping to index.\\n\\n It has two arguments: doit and timestamp\\n If doit is False, it should return None immediately.\\n If timestamp is provided, it should be used in the mapping.\\n\\n Example:\\n\\n async def get_file(doit=True, timestamp=None):\\n if not doit:\\n return\\n return {'TEXT': 'DATA', 'timestamp': timestamp,\\n 'id': 'doc-id'}\\n \\\"\\\"\\\"\"), \"\\n \", mdx(\"span\", {\n parentName: \"code\",\n \"className\": \"token keyword\"\n }, \"raise\"), \" NotImplementedError\"))), mdx(\"p\", null, \"To learn more about how to write a custom connector, refer to the \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://www.elastic.co/search-labs/how-to-create-customized-connectors-for-elasticsearch\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"How to create customized connectors for Elasticsearch\"), \" blog post or check the implementation of \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors/tree/main/connectors/sources\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"existing connectors\"), \".\"), mdx(\"p\", null, \"You can use \", mdx(\"em\", {\n parentName: \"p\"\n }, \"Customized connector\"), \" to configure and run your custom connector from Kibana. Navigate to: \", mdx(\"strong\", {\n parentName: \"p\"\n }, \"Search > Indices > Create a new index > Connector > Customized connector\"), \".\"), mdx(\"p\", null, \"If you want to contribute your custom connector to the open-source connectors framework, refer to the \", mdx(\"a\", {\n parentName: \"p\",\n \"href\": \"https://github.com/elastic/connectors/blob/main/docs/CONTRIBUTING.md\",\n \"target\": \"_blank\",\n \"rel\": \"nofollow noopener noreferrer\"\n }, \"How to contribute connectors guide\"), \" in the \", mdx(\"code\", {\n parentName: \"p\",\n \"className\": \"language-text\"\n }, \"connectors\"), \" repository.\"));\n}\n;\nMDXContent.isMDXComponent = true;","tableOfContents":{"items":[{"url":"#data-connectors-framework","title":"Data Connectors Framework"},{"url":"#architecture","title":"Architecture"},{"url":"#connector-protocol","title":"Connector Protocol"},{"url":"#available-connectors","title":"Available connectors"},{"url":"#custom-connectors","title":"Custom connectors"}]},"timeToRead":3,"frontmatter":{"title":"Elastic Data Connectors - Elasticsearch Ingestion Made Simple","date":"October 05, 2023","spoiler":"Overview of a new, lightweight data connectors framework integrated with the Elastic stack that can ingest data from any source into the Elasticsearch index.","ogimage":null,"images":[{"childImageSharp":{"gatsbyImageData":{"layout":"constrained","placeholder":{"fallback":""},"backgroundColor":"transparent","images":{"fallback":{"src":"/static/55addb8c1fa1dbb9fb3c906832de5c91/6cc44/connectors-architecture.png","srcSet":"/static/55addb8c1fa1dbb9fb3c906832de5c91/b96cf/connectors-architecture.png 750w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/439ee/connectors-architecture.png 1080w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/7afce/connectors-architecture.png 1366w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/6cc44/connectors-architecture.png 1400w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/13a12/connectors-architecture.png 1920w","sizes":"(min-width: 1400px) 1400px, 100vw"},"sources":[{"srcSet":"/static/55addb8c1fa1dbb9fb3c906832de5c91/c4a99/connectors-architecture.webp 750w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/719a3/connectors-architecture.webp 1080w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/14fd2/connectors-architecture.webp 1366w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/a48e7/connectors-architecture.webp 1400w,\n/static/55addb8c1fa1dbb9fb3c906832de5c91/9e507/connectors-architecture.webp 1920w","type":"image/webp","sizes":"(min-width: 1400px) 1400px, 100vw"}]},"width":1400,"height":739}}},{"childImageSharp":{"gatsbyImageData":{"layout":"constrained","placeholder":{"fallback":""},"backgroundColor":"transparent","images":{"fallback":{"src":"/static/39bbda5d2268372a7b1897ac057c3a71/72368/google_drive_connector.png","srcSet":"/static/39bbda5d2268372a7b1897ac057c3a71/2479c/google_drive_connector.png 750w,\n/static/39bbda5d2268372a7b1897ac057c3a71/f45fa/google_drive_connector.png 1080w,\n/static/39bbda5d2268372a7b1897ac057c3a71/fc4d4/google_drive_connector.png 1366w,\n/static/39bbda5d2268372a7b1897ac057c3a71/72368/google_drive_connector.png 1400w,\n/static/39bbda5d2268372a7b1897ac057c3a71/8ebad/google_drive_connector.png 1920w","sizes":"(min-width: 1400px) 1400px, 100vw"},"sources":[{"srcSet":"/static/39bbda5d2268372a7b1897ac057c3a71/cc1d9/google_drive_connector.webp 750w,\n/static/39bbda5d2268372a7b1897ac057c3a71/05669/google_drive_connector.webp 1080w,\n/static/39bbda5d2268372a7b1897ac057c3a71/60efe/google_drive_connector.webp 1366w,\n/static/39bbda5d2268372a7b1897ac057c3a71/be3c0/google_drive_connector.webp 1400w,\n/static/39bbda5d2268372a7b1897ac057c3a71/7f6be/google_drive_connector.webp 1920w","type":"image/webp","sizes":"(min-width: 1400px) 1400px, 100vw"}]},"width":1400,"height":782}}}],"blogImages":null},"fields":{"slug":"/elasticsearch-data-connectors/","category":"tech-blog"}}},"pageContext":{"slug":"/elasticsearch-data-connectors/","previous":{"fields":{"slug":"/exploring-apache-lucene-scale/","directoryName":"exploring-apache-lucene-scale","category":"tech-blog"},"frontmatter":{"title":"Exploring Apache Lucene - Part 3: Running at Scale"}},"next":null}},"staticQueryHashes":["4190863273"]} \ No newline at end of file diff --git a/rss.xml b/rss.xml index 9fdc457d6..501d3fbe1 100644 --- a/rss.xml +++ b/rss.xml @@ -1,4 +1,4 @@ -<![CDATA[Jedr's Blog]]>https://j.blaszyk.meGatsbyJSTue, 10 Oct 2023 07:38:05 GMT<![CDATA[Cargo Bike: The Future of Sustainable Urban Mobility]]>https://j.blaszyk.me/cargo-bike-the-future-of-sustainable-urban-mobility/https://j.blaszyk.me/cargo-bike-the-future-of-sustainable-urban-mobility/Sun, 08 Oct 2023 00:00:00 GMT +<![CDATA[Jedr's Blog]]>https://j.blaszyk.meGatsbyJSSat, 14 Oct 2023 07:14:49 GMT<![CDATA[Cargo Bike: The Future of Sustainable Urban Mobility]]>https://j.blaszyk.me/cargo-bike-the-future-of-sustainable-urban-mobility/https://j.blaszyk.me/cargo-bike-the-future-of-sustainable-urban-mobility/Sun, 08 Oct 2023 00:00:00 GMT <div style="margin-top=55px; font-style: italic;">(This is an article posted to my blog. You can read it online by <a href="https://j.blaszyk.me/cargo-bike-the-future-of-sustainable-urban-mobility/">clicking here</a>.)</div> <![CDATA[Bikepacking in France - Provence and Hautes-Alpes]]>https://j.blaszyk.me/bikepacking-in-provence-france/https://j.blaszyk.me/bikepacking-in-provence-france/Sat, 09 Sep 2023 00:00:00 GMT <div style="margin-top=55px; font-style: italic;">(This is an article posted to my blog. You can read it online by <a href="https://j.blaszyk.me/bikepacking-in-provence-france/">clicking here</a>.)</div> diff --git a/tech-blog/elasticsearch-data-connectors/index.html b/tech-blog/elasticsearch-data-connectors/index.html index 25dde32c7..6911ebb8d 100644 --- a/tech-blog/elasticsearch-data-connectors/index.html +++ b/tech-blog/elasticsearch-data-connectors/index.html @@ -78,7 +78,7 @@ let theme = preferredTheme || (lightQuery.matches ? 'light' : 'dark'); setTheme(theme); })(); -

Elastic Data Connectors - Elasticsearch Ingestion Made Simple

Tech Blog

October 5, 2023☕️ 3 min read

In any search application, the is an underlying ingestion pipeline that indexes the data. While developers often focus on speedy searches, they tend to overlook the aspect of data indexing. It’s important to keep in mind that easy-to-manage data ingestion is a key foundation for successful data-driven solutions.

Elasticsearch (ES) has many ingestion tools, but sometimes they don’t fit all data sources. That’s where Elastic Data Connectors come in. They let you easily link custom or proprietry data to Elasticsearch. This framework is lightweight, open-source and flexible, and works both on-premise and in the cloud. In short, it’s a straightforward way to keep any content source, e.g. database, cloud storage or local file system, in sync with a search-optimized Elasticsearch index.

Other ingestion solutions offered by the Enterprise Search component in the Elastic stack are Web Crawler and Index API.

Data Connectors Framework

elastic/connectors-python

The data connectors framework makes it easier for developers to create connector clients that can sync data from other sources into Elasticsearch. The framework takes care of essential tasks like scheduling data syncs, extracting text from files, and setting up index mappings automatically. This way, developers can concentrate on integrating their chosen data source without worrying about these common tasks.

Elastic data connectors - Google Drive connector syncing data

The data connectors framework can be configured to use Elasticsearch’s ingestion pipelines to perform transformations on data before storing it in an index. A common use case is document enrichment with machine learning. For example, you can:

  • analyze text fields using a Text embedding model that will generate a dense vector representation of your data
  • run text classification for sentiment analysis
  • extract key information from text with Named Entitiy Recogintion (NER)

Architecture

The data connectors framework is deployed as a separate stateless service. You can host them yourself, or for selected native connectors run them in the Elastic Cloud natively.

The framework connects your third-party data source with an Elasticsearch index and keeps it in sync, so that you can focus on search and analytics with your data.

Elastic Data Connectors Architecture

Connector Protocol

Connectors framework relies on the Connector Protocol. All communication between connectors and other parts of the stack happens asynchronously through an Elasticsearch index. This comes with several benefits.

  • Stateless deployment: The data connectors service relies on external state in an ES index
  • Fault-tolerance: The service can resume operation on a different host after a restart or a failure. Once it reestablishes connection with Elasticsearch, it will continue its normal operation.
  • Developers have control over a deployment - This service can be easily self-hosted or run in the Elastic cloud. It only needs to be able to discover your Elasticsearch instance over the network.

This setup is developer friendly and aims to make connectors service easy to deploy and manage. The framework is written in async python making this IO-bound framework lightweight, fast and efficient.

Available connectors

Currently, the following content sources are supported as of version 8.10. For an up-to-date list of supported connectors check the official documentation.

You can check the connector implementations in: connectors-python/connectors/sources.

Custom connectors

You are not limited to the connectors included in the data connectors framework. It is easy to implement a custom data connector using the abstractions provided by the framework.

All you need to do is define a custom DataSource class in async python and the framework will take care of making it compatible with common functionalities, such as scheduling data syncs, extracting text from files, and setting up index mappings automatically.

Here is an example starting point for implementing MyCustomDataSource. In order to turn this into a functional connector you need to define get_default_configuration, ping and get_docs methods.

class MyCustomDataSource(BaseDataSource):
+            

Elastic Data Connectors - Elasticsearch Ingestion Made Simple

Tech Blog

October 5, 2023☕️ 3 min read

In any search application, the is an underlying ingestion pipeline that indexes the data. While developers often focus on speedy searches, they tend to overlook the aspect of data indexing. It’s important to keep in mind that easy-to-manage data ingestion is a key foundation for successful data-driven solutions.

Elasticsearch (ES) has many ingestion tools, but sometimes they don’t fit all data sources. That’s where Elastic Data Connectors come in. They let you easily link custom or proprietry data to Elasticsearch. This framework is lightweight, open-source and flexible, and works both on-premise and in the cloud. In short, it’s a straightforward way to keep any content source, e.g. database, cloud storage or local file system, in sync with a search-optimized Elasticsearch index.

Other ingestion solutions offered by the Enterprise Search component in the Elastic stack are Web Crawler and Index API.

Data Connectors Framework

elastic/connectors

The data connectors framework makes it easier for developers to create connector clients that can sync data from other sources into Elasticsearch. The framework takes care of essential tasks like scheduling data syncs, extracting text from files, and setting up index mappings automatically. This way, developers can concentrate on integrating their chosen data source without worrying about these common tasks.

Elastic data connectors - Google Drive connector syncing data

The data connectors framework can be configured to use Elasticsearch’s ingestion pipelines to perform transformations on data before storing it in an index. A common use case is document enrichment with machine learning. For example, you can:

  • analyze text fields using a Text embedding model that will generate a dense vector representation of your data
  • run text classification for sentiment analysis
  • extract key information from text with Named Entitiy Recogintion (NER)

Architecture

The data connectors framework is deployed as a separate stateless service. You can host them yourself, or for selected native connectors run them in the Elastic Cloud natively.

The framework connects your third-party data source with an Elasticsearch index and keeps it in sync, so that you can focus on search and analytics with your data.

Elastic Data Connectors Architecture

Connector Protocol

Connectors framework relies on the Connector Protocol. All communication between connectors and other parts of the stack happens asynchronously through an Elasticsearch index. This comes with several benefits.

  • Stateless deployment: The data connectors service relies on external state in an ES index
  • Fault-tolerance: The service can resume operation on a different host after a restart or a failure. Once it reestablishes connection with Elasticsearch, it will continue its normal operation.
  • Developers have control over a deployment - This service can be easily self-hosted or run in the Elastic cloud. It only needs to be able to discover your Elasticsearch instance over the network.

This setup is developer friendly and aims to make connectors service easy to deploy and manage. The framework is written in async python making this IO-bound framework lightweight, fast and efficient.

Available connectors

Currently, the following content sources are supported as of version 8.10. For an up-to-date list of supported connectors check the official documentation.

You can check the connector implementations in: connectors/sources.

Custom connectors

You are not limited to the connectors included in the data connectors framework. It is easy to implement a custom data connector using the abstractions provided by the framework.

All you need to do is define a custom DataSource class in async python and the framework will take care of making it compatible with common functionalities, such as scheduling data syncs, extracting text from files, and setting up index mappings automatically.

Here is an example starting point for implementing MyCustomDataSource. In order to turn this into a functional connector you need to define get_default_configuration, ping and get_docs methods.

class MyCustomDataSource(BaseDataSource):
     """Connector to my custom data source"""
 
     name = "Custom Source"
@@ -121,7 +121,7 @@
                return {'TEXT': 'DATA', 'timestamp': timestamp,
                        'id': 'doc-id'}
         """
-        raise NotImplementedError

To learn more about how to write a custom connector, refer to the How to create customized connectors for Elasticsearch blog post or check the implementation of existing connectors.

You can use Customized connector to configure and run your custom connector from Kibana. Navigate to: Search > Indices > Create a new index > Connector > Customized connector.

If you want to contribute your custom connector to the open-source connectors framework, refer to the How to contribute connectors guide in the connectors-python repository.

Blog by Jedr Blaszyk. Tech, cycling, photography & travelling.

Comments