From 0d4adb5f1ad6c38a828370414a584dd485165dce Mon Sep 17 00:00:00 2001
From: Amir Pourmand <pourmand1376@gmail.com>
Date: Thu, 3 Aug 2023 17:39:31 +0330
Subject: [PATCH] Add Isna Persian Dataset (#3631)

The level of importance of this data is less than Wikipedia. So, I think
[this pull
request](https://github.com/LAION-AI/Open-Assistant/pull/3629) should be
merged first.

I have uploaded the data to
[huggingface](https://huggingface.co/datasets/pourmand1376/isna-news)
according to Open-assistant's standard. So, it shouldn't need any
processing.

---------

Co-authored-by: Oliver Stanley <olivergestanley@gmail.com>
---
 data/datasets/__init__.py            | 1 +
 data/datasets/fa-isna-news/README.md | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 data/datasets/fa-isna-news/README.md

diff --git a/data/datasets/__init__.py b/data/datasets/__init__.py
index 92a82179f0..7b2c077ea4 100644
--- a/data/datasets/__init__.py
+++ b/data/datasets/__init__.py
@@ -4,6 +4,7 @@
     "tv_dialogue": "sedthh/tv_dialogue",  # TV and Movie dialogues and transcripts
     "fd_dialogue": "sedthh/fd_dialogue",  # TV and Movie dialogues and transcripts from ForeverDreaming
     "tlcv2.0_oa": "pythainlp/tlcv2.0_oa",  # Thai classical literature texts
+    "fa-isna-news": "pourmand1376/isna-news",  # Isna Persian News
     "fa-wikipedia": "pourmand1376/fa-wikipedia",  # Farsi Wikipedia texts
 }
 
diff --git a/data/datasets/fa-isna-news/README.md b/data/datasets/fa-isna-news/README.md
new file mode 100644
index 0000000000..fa216bef44
--- /dev/null
+++ b/data/datasets/fa-isna-news/README.md
@@ -0,0 +1,2 @@
+This text-only dataset is crawled from [Isna news](https://isna.ir/). This is
+biggest farsi news agency and thus the text is pretty clean.