fix typos

LAION-AI · Feb 29, 2024 · eeb9cc3 · eeb9cc3
1 parent 1db0301
commit eeb9cc3
Show file tree

Hide file tree

Showing 18 changed files with 31 additions and 31 deletions.
diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py
@@ -1085,7 +1085,7 @@ def _query_need_review(
 
     def query_prompts_need_review(self, lang: str) -> list[Message]:
         """
-        Select initial prompt messages with less then required rankings in active message tree
+        Select initial prompt messages with less than required rankings in active message tree
         (active == True in message_tree_state)
         """
         return self._query_need_review(
@@ -1094,7 +1094,7 @@ def query_prompts_need_review(self, lang: str) -> list[Message]:
 
     def query_replies_need_review(self, lang: str) -> list[Message]:
         """
-        Select child messages (parent_id IS NOT NULL) with less then required rankings
+        Select child messages (parent_id IS NOT NULL) with less than required rankings
         in active message tree (active == True in message_tree_state)
         """
         return self._query_need_review(message_tree_state.State.GROWING, self.cfg.num_reviews_reply, False, lang)

diff --git a/data/datasets/TSSB-3M/generate_dataset.py b/data/datasets/TSSB-3M/generate_dataset.py
@@ -117,7 +117,7 @@ def clean(text):
 
 
 def clean_PII(text):
-    # Remove sign-off messege generated by `git commit --signoff`, eg. "Signed-off-by: user_name <[email protected]>"
+    # Remove sign-off message generated by `git commit --signoff`, eg. "Signed-off-by: user_name <[email protected]>"
     signoff_index = text.rfind("\n\nSigned-off-by:")
     if signoff_index != -1:
         # Remove the sign-off string from the commit message

diff --git a/data/datasets/instructional_codesearchnet_python/Summarize_codesearchnet_for_python.ipynb b/data/datasets/instructional_codesearchnet_python/Summarize_codesearchnet_for_python.ipynb
@@ -26,7 +26,7 @@
     "id": "K9sCPQzIb278"
    },
    "source": [
-    "### DOWLOAD THE DATASET"
+    "### DOWNLOAD THE DATASET"
    ]
   },
   {
@@ -156,7 +156,7 @@
     "id": "3MxfnNxX2n0m"
    },
    "source": [
-    "### GENERATE THE SUMMARIES AND ANOTATE THE DATASET"
+    "### GENERATE THE SUMMARIES AND ANNOTATE THE DATASET"
    ]
   },
   {

diff --git a/data/datasets/recipes/tasty_recipes.ipynb b/data/datasets/recipes/tasty_recipes.ipynb
@@ -158,7 +158,7 @@
     "    for i, instruction in enumerate(ingredient_and_instructions[row[\"slug\"]][\"instructions\"]):\n",
     "        instructions += f\"\\n{i+1}. {convert_fraction_unicode_chars_to_strings(instruction['display_text'])}\"\n",
     "\n",
-    "    # Constuct the full response\n",
+    "    # Construct the full response\n",
     "    response = f\"\"\"Here's a recipe for {recipe_name}:\n",
     "\n",
     "Ingredients:\n",

diff --git a/data/datasets/safety_directory/emergency_infos/wikipedia_emergency_info.js b/data/datasets/safety_directory/emergency_infos/wikipedia_emergency_info.js
@@ -1,5 +1,5 @@
 /**
- * Developper console script used to generate the associated json file.
+ * Developer console script used to generate the associated json file.
  * Wikipedia URL : https://en.wikipedia.org/wiki/List_of_suicide_crisis_lines
  * Author : Lucas Oulieu
  */

diff --git a/data/datasets/tv_dialogue/README.md b/data/datasets/tv_dialogue/README.md
@@ -47,7 +47,7 @@ How's it going?
 on Huggingface!
 
 They are examples on Huggingface.
-CUT OUT TO ANOTHER SCENCE
+CUT OUT TO ANOTHER SCENE
 
 We are somewhere else
 [PERSON 1 (v.o)] I wonder where we are?

diff --git a/docs/docs/architecture/inference.md b/docs/docs/architecture/inference.md
@@ -111,7 +111,7 @@ The inference server is built around [FastAPI](https://fastapi.tiangolo.com/).
       for any other currently pending messages in the chat to
       `inference.MessageState.cancelled`.
    3. After updating the `message` table, we create a RedisQueue for this
-      specific message and enque the message.
+      specific message and enqueue the message.
    4. Finally, we return an `inference.MessageRead` (a Pydantic model) to the
       client. This is the object contains the needed `message_id`.
 

diff --git a/inference/server/oasst_inference_server/compliance.py b/inference/server/oasst_inference_server/compliance.py
@@ -57,7 +57,7 @@ async def run_compliance_check(websocket: fastapi.WebSocket, worker_id: str, wor
     Run a compliance check for the given worker:
     - Find a suitable compliance check assistant message
     - Task the worker with generating a response with the same context
-    - Compare the respons against the existing completed message
+    - Compare the response against the existing completed message
     - Update the database with the outcome
     """
     async with deps.manual_create_session() as session:

diff --git a/model/model_eval/manual/create_synth_import.py b/model/model_eval/manual/create_synth_import.py
@@ -83,7 +83,7 @@ def main():
             reply_texts.add(m.text)
 
         if len(unique_replies) < 2:
-            print("Skipping enty with < 2 unique replies")
+            print("Skipping entry with < 2 unique replies")
             continue
 
         prompt_message = ExportMessageNode(

diff --git a/model/model_training/custom_datasets/formatting.py b/model/model_training/custom_datasets/formatting.py
@@ -74,7 +74,7 @@ def system_tag(
 
         shuffle(properties)
 
-        # ensure that potentially multi-line conext field comes last
+        # ensure that potentially multi-line context field comes last
         if self.context:
             properties.append(("context", self.context))
 

diff --git a/model/model_training/models/__init__.py b/model/model_training/models/__init__.py
@@ -2,7 +2,7 @@
 
 
 def freeze_top_n_layers(model, target_layers):
-    # its possible we can simply detect which module is a ModuleList
+    # it's possible we can simply detect which module is a ModuleList
     # and simply freeze the module without doing string parsing
     for name, param in model.named_parameters():
         if "embed" in name:

diff --git a/model/model_training/models/patching_falcon.py b/model/model_training/models/patching_falcon.py
@@ -19,7 +19,7 @@ def falcon_forward_with_flash_attn(
 ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
     """
     head_mask, alibi & output_attention are not supported.
-    Reference to the original `FalconAttention.forwad()` method which this patch replaces:
+    Reference to the original `FalconAttention.forward()` method which this patch replaces:
     https://github.com/huggingface/transformers/blob/c965d302791cf935d6ea7776428749be678cf509/src/transformers/models/falcon/modeling_falcon.py#L281
     """
 

diff --git a/notebooks/TSSB-3M-bugs-dataset/TSSB-3M-bugs_dataset.ipynb b/notebooks/TSSB-3M-bugs-dataset/TSSB-3M-bugs_dataset.ipynb
@@ -707,10 +707,10 @@
     "\n",
     "g = Github()\n",
     "\n",
-    "# TO DO, find a way to get a commmit from SHA\n",
+    "# TO DO, find a way to get a commit from SHA\n",
     "# 1. Use GitHub API\n",
     "# 2. Download repos with their history\n",
-    "# 3. Web scaping"
+    "# 3. Web scraping"
    ]
   },
   {

diff --git a/notebooks/data-augmentation/essay-revision/essay-revision.ipynb b/notebooks/data-augmentation/essay-revision/essay-revision.ipynb
@@ -201,7 +201,7 @@
    },
    "outputs": [],
    "source": [
-    "# Make grammar erros (more like: change random words into words of similar meaning)\n",
+    "# Make grammar errors (more like: change random words into words of similar meaning)\n",
     "import nltk\n",
     "from nltk.corpus import wordnet\n",
     "import random\n",

diff --git a/notebooks/data-augmentation/unified-qa/unified-qa.ipynb b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb
@@ -1004,7 +1004,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "random.seed(20)  # for reproduciablity"
+    "random.seed(20)  # for reproducibility"
    ]
   },
   {
@@ -1038,7 +1038,7 @@
     "        answer = item.Answer\n",
     "        if question == np.nan or answer == np.nan:\n",
     "            print(\"Skipped\")\n",
-    "        # get a random conversation generatore function\n",
+    "        # get a random conversation generator function\n",
     "        conv_func = random.choice(conv_funcs)\n",
     "        try:\n",
     "            conv_list = conv_func(question, answer)\n",

diff --git a/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb b/notebooks/data-augmentation/wikidata-qa/wikidata.ipynb
@@ -810,7 +810,7 @@
     "                \"{sub} is used mostly for {a}.\",\n",
     "                \"{name} is mostly known for {a}.\",\n",
     "            ],\n",
-    "            \"P487\": [\"{a}\", \"The {name} emoji is {a}.\", \"The {a} character repesents {name}.\"],\n",
+    "            \"P487\": [\"{a}\", \"The {name} emoji is {a}.\", \"The {a} character represents {name}.\"],\n",
     "            \"P509\": [\"{name} died of {a}.\", \"The cause of {pos} death was {a}.\"],\n",
     "            \"P527\": [\"{name} are made of {a}.\", \"They are made of {a}.\"],\n",
     "            \"P569\": [\"{name} was born on {a}.\", \"{pos} birthday is on the {a}.\"],\n",
@@ -828,12 +828,12 @@
     "            ],\n",
     "            \"P580\": [\"{name} started in {a}.\", \"{name} first started at {a}.\"],\n",
     "            \"P582\": [\"{name} ended in {a}.\", \"{name} lasted until {a}.\"],\n",
-    "            \"P625\": [\"{name} is lcoated at {a}.\", \"The coordinates for {name} are {a}.\", \"{pos} GPS location is {a}.\"],\n",
+    "            \"P625\": [\"{name} is located at {a}.\", \"The coordinates for {name} are {a}.\", \"{pos} GPS location is {a}.\"],\n",
     "            \"P837\": [\"{name} is celebrated on {a}.\", \"{name} is on {a}.\"],\n",
     "            \"P856\": [\n",
     "                \"The URL for {name} is: {a}\",\n",
     "                \"See {a}\",\n",
-    "                \"The URL of {pos} webiste is {a}\",\n",
+    "                \"The URL of {pos} website is {a}\",\n",
     "                \"{pos} web address is: {a}\",\n",
     "            ],\n",
     "            \"P973\": [\n",
@@ -855,7 +855,7 @@
     "            \"P2043\": [\"{name} is {a} long.\", \"{sub} has a length of {a}.\"],\n",
     "            \"P2044\": [\"{name} is {a} tall.\", \"{name} is {a} above sea level.\", \"{pos} elevation is {a}.\"],\n",
     "            \"P2046\": [\"{name}'s area is {a}\", \"{pos} area is {a}.\"],\n",
-    "            \"P2049\": [\"{name}'s widht is {a}.\", \"{name} is {a} wide.\"],\n",
+    "            \"P2049\": [\"{name}'s width is {a}.\", \"{name} is {a} wide.\"],\n",
     "            \"P2250\": [\"{name} have a life expectancy of {a}.\", \"{pos} life expectancy is about {a}.\"],\n",
     "            \"P2283\": [\n",
     "                \"{name} uses {a} to work.\",\n",
@@ -887,20 +887,20 @@
     "                \"{pos} {l} children are {a}.\",\n",
     "            ],\n",
     "            \"P50\": [\"{name} was co-written by {a}.\", \"The authors of {name} are {a}.\"],\n",
-    "            \"P57\": [\"{name} was direcrted by the following people: {a}.\", \"{a} were the directors of {name}.\"],\n",
+    "            \"P57\": [\"{name} was directed by the following people: {a}.\", \"{a} were the directors of {name}.\"],\n",
     "            \"P61\": [\"{pos} inventors are {a}.\", \"{name} was discovered by {a}.\"],\n",
     "            \"P106\": [\"{name} has multiple occupations: {a}.\", \"{name}'s job titles are: {a}.\"],\n",
     "            \"P169\": [\"{name} is the CEO of multiple companies, such as {a}.\", \"{sub} is the CEO at {a}.\"],\n",
     "            \"P225\": [\"The taxon names for {name} are {a}.\", \"The proper scientific terms for {name} are {a}.\"],\n",
     "            \"P246\": [\"The elements of {name} are {a}.\", \"The symbols for {name} are {a}.\"],\n",
     "            \"P274\": [\"The formulas for {name} are {a}.\", \"The chemical formulas of the compound {name} are {a}.\"],\n",
-    "            \"P487\": [\"The {name} emojis are {a}.\", \"The characters {a} repesent {name}.\"],\n",
+    "            \"P487\": [\"The {name} emojis are {a}.\", \"The characters {a} represent {name}.\"],\n",
     "            \"P527\": [\"The ingredients of {name} are {a}.\", \"{a} are all parts needed for {name}.\"],\n",
     "            \"P575\": [\n",
     "                \"Sources disagree on the exact date, it is said that {name} was invented in {a}.\",\n",
     "                \"{name} was discovered multiple times at {a}.\",\n",
     "            ],\n",
-    "            \"P856\": [\"The URLs for {name} are: {a}\", \"See {a}\", \"The URLs of {pos} webiste are {a}\"],\n",
+    "            \"P856\": [\"The URLs for {name} are: {a}\", \"See {a}\", \"The URLs of {pos} website are {a}\"],\n",
     "            \"P625\": [\n",
     "                \"{name} can be found under the following GPS locations: {a}.\",\n",
     "                \"The coordinates for {name} are {a}.\",\n",

diff --git a/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb b/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb
@@ -327,11 +327,11 @@
     "\n",
     "| Model name | Not obviously toxic| Not obviously non-toxic | Obviously toxic| Obviously non-toxic|\n",
     "| :---: | :---: | :---: |:---: | :---: |\n",
-    "|original| failed at all, easily accepted racist, sexist overally toxic prompts that were well formulated |Very sensitive on swear words, failed to reckognize context| good performance|good performance|\n",
-    "|unbiased|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to reckognize some gender stereotype mockery | good performance\n",
-    "|multilingual|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to reckognize some gender stereotype mockery | good performance\n",
+    "|original| failed at all, easily accepted racist, sexist overally toxic prompts that were well formulated |Very sensitive on swear words, failed to recognize context| good performance|good performance|\n",
+    "|unbiased|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to recognize some gender stereotype mockery | good performance\n",
+    "|multilingual|Managed to find some hidden toxicity but not on all sentences| Very sensitive explicit language but shown ability to recognize context| Did well but failed to recognize some gender stereotype mockery | good performance\n",
     "\n",
-    "Subjectivly 'unbiased' looks like the best performing model. \n",
+    "Subjectively 'unbiased' looks like the best performing model. \n",
     "\n",
     "I don't think it would do well as a security layer in a live version of open assistant unless we do some finetuning first, because it can be fooled to pass toxicity if it's presented in formal language. \n",
     "\n",

diff --git a/website/src/lib/oasst_api_client.ts b/website/src/lib/oasst_api_client.ts
@@ -230,7 +230,7 @@ export class OasstApiClient {
   }
 
   /**
-   * Modify a message's content and save it's previous content as a revision
+   * Modify a message's content and save its previous content as a revision
    */
   async edit_message(message_id: string, user: BackendUserCore, new_content: string) {
     return this.post<void>(`/api/v1/messages/${message_id}/edit`, {