Skip to content

Commit

Permalink
regular expressions update
Browse files Browse the repository at this point in the history
  • Loading branch information
ddmitov committed Sep 26, 2023
1 parent 9fe8ff3 commit cc8b35b
Showing 1 changed file with 38 additions and 11 deletions.
49 changes: 38 additions & 11 deletions epigraphista/javascript/epidoc_text_converter.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,24 @@ function convertLeidenToEpidoc (text) {
// ///////////////////////// //

// <<exemplum>> //
// visual explanation:
// https://regexper.com/#%3C%3C%28%28%3F%3A%5Ba-z%5D%7Cs%29%2B%29%3E%3E
text = text.replace(
RegExp('<<((?:[' + unicodeBlocks + ']|s)+)>>', 'g'),
'<add place="overstrike">$1</add>'
)

// [[exemplum]] //
// visual explanation:
// https://regexper.com/#%5C%5B%5C%5B%28%28%3F%3A%5Ba-z%5D%7Cs%29%2B%29%5C%5D%5C%5D
text = text.replace(
RegExp('\\[\\[((?:[' + unicodeBlocks + ']|s)+)\\]\\]', 'g'),
'<del rend="erasure">$1</del>'
)

// ((exemplum)) //
// visual explanation:
// https://regexper.com/#%5C%28%5C%28%28%28%3F%3A%5Ba-z%5D%7Cs%29%2B%29%5C%29%5C%29
text = text.replace(
RegExp('\\(\\(((?:[' + unicodeBlocks + ']|s)+)\\)\\)', 'g'),
'<g type="$1"/>'
Expand All @@ -61,44 +67,57 @@ function convertLeidenToEpidoc (text) {
// ///////////////////////// //

// <exemplum> //
// '<exemplum>' will match, but '<<exemplum>>' will NOT match.
// visual explanation:
// https://regexper.com/#%28%5B%5E%3C%5D%29%3C%28%28%3F%3A%5Ba-z%5D%7Cs%29%2B%29%3E%28%5B%5E%3E%5D%29
text = text.replace(
RegExp('([^<]){0,1}<((?:[' + unicodeBlocks + ']|s)+)>([^>]|$)', 'g'),
RegExp('([^<])<((?:[' + unicodeBlocks + ']|s)+)>([^>])', 'g'),
'$1<supplied reason="omitted">$2</supplied>$3'
)

// {exemplum} //
// visual explanation:
// https://regexper.com/#%7B%28%28%3F%3A%5Ba-z%5D%7Cs%29%2B%29%7D
text = text.replace(
RegExp('{((?:[' + unicodeBlocks + ']|s)+)}', 'g'),
'<sic>$1</sic>'
)

// [c.2] //
// visual explanation:
// https://regexper.com/#%5C%5Bc%5C.%28%28%3F%3A%5Cd%29%2B%29%5C%5D
text = text.replace(
/\[c\.((?:\d)+)\]/g,
'<gap reason="lost" extent="$1" unit="character"/>'
)

// [-] //
// visual explanation:
// https://regexper.com/#%5C%5B-%5C%5D
text = text.replace(
/\[-\]/g,
'<name><gap reason="lost" extent="unknown" unit="character"/></name>'
)

// [- -] //
// visual explanation:
// https://regexper.com/#%28%5E%7C%5Cn%29%7B1%7D%28%5Cs%29%7B0%2C%7D%5C%5B-%5Cs-%5C%5D%28%5Cs%29%7B0%2C%7D%28%5Cn%7C%24%29%7B1%7D
text = text.replace(
/(^|\n){1}([\s]){0,}\[\-\s\-\](\s){0,}(\n|$){1}/g,
/(^|\n){1}(\s){0,}\[-\s-\](\s){0,}(\n|$){1}/g,
'$1<gap reason="lost" extent="1" unit="line"/>\n'
)

// [- -] ? //
// visual explanation:
// https://regexper.com/#%28%5E%7C%5Cn%29%7B1%7D%28%5B%5Cs%5D%29%7B0%2C%7D%5C%5B-%5Cs-%5C%5D%5Cs%5C%3F%28%5Cn%7C%24%29%7B1%7D
text = text.replace(
/(^|\n){1}([\s]){0,}\[\-\s\-\]\s\?(\n|$){1}/g,
/(^|\n){1}([\s]){0,}\[-\s-\]\s\?(\n|$){1}/g,
'$1<gap reason="lost" extent="unknown" unit="line"/>'
)

// [--] //
const lostCharactersRegExp = /([^\[]){0,1}\[([\-]+)\]([^\]]){0,1}/
// visual explanation:
// https://regexper.com/#%28%5B%5E%5B%5D%29%5C%5B%28%5B-%5D%2B%29%5C%5D%28%5B%5E%5C%5D%5D%29
const lostCharactersRegExp = /([^[])\[([-]+)\]([^\]])/

while (text.match(lostCharactersRegExp)) {
text = text.replace(
Expand All @@ -109,8 +128,10 @@ function convertLeidenToEpidoc (text) {
)
}

// [. .] //
const lostRegExp = /\[([\.\s]+)\]/
// [..] //
// visual explanation:
// https://regexper.com/#%5C%5B%28%5B.%5D%2B%29%5C%5D
const lostRegExp = /\[([.]+)\]/

while (text.match(lostRegExp)) {
// Brackets are not counted!
Expand All @@ -126,12 +147,16 @@ function convertLeidenToEpidoc (text) {
text = text.replace(/\(!\)/g, '<note>!</note>')

// (scil. exemplum) //
// visual explanation:
// https://regexper.com/#%5C%28scil%5C.%5Cs%28%28%3F%3A%5Ba-z%5D%7Cs%29%2B%29%5C%29
text = text.replace(
RegExp('\\(scil. ((?:[' + unicodeBlocks + ']|s)+)\\)', 'g'),
RegExp('\\(scil\\.\\s((?:[' + unicodeBlocks + ']|s)+)\\)', 'g'),
'<supplied reason="subaudible">$1</supplied>'
)

// exemplum(- -) //
// visual explanation:
// https://regexper.com/#%28%5CS%2B%29%5C%28%28-%5Cs*%29%2B%5C%29
text = text.replace(/(\S+)\((-\s*)+\)/g, '<abbr>$1</abbr>')

// (- -) //
Expand Down Expand Up @@ -279,7 +304,7 @@ function convertLeidenToEpidoc (text) {
// 6. [c.2]
// 7. [-]
// 8. [--]
// 9. [. .]
// 9. [..]
// 10. (!)
// 11. (scil. exemplum)
// 12. exemplum(- -)
Expand All @@ -300,9 +325,11 @@ function convertLeidenToEpidoc (text) {
// 27. ++
// 28. ...

// Paste in the 'Inscription Text' textarea without the comments:
// Paste in the 'Inscription Text' textarea
// without the comment mark and with nothing else on the line :
// [- -]
// [- -] ?

// Paste in the 'Inscription Text' textarea and add a new line bellow:
// Paste in the 'Inscription Text' textarea
// without the comment mark and add a new line bellow:
// exempl-

0 comments on commit cc8b35b

Please sign in to comment.