Skip to content

Commit 6080ceb

Browse files
committed
initial working migration
1 parent 9f98c84 commit 6080ceb

File tree

83 files changed

+42210
-6
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+42210
-6
lines changed

nuve.client/AnalysisHelper.cs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Text.RegularExpressions;
7+
using Nuve.Lang;
8+
using Nuve.Morphologic.Structure;
9+
10+
namespace Nuve.Client
11+
{
12+
internal class AnalysisHelper
13+
{
14+
public static void Analyze(Language language, IEnumerable<string> words)
15+
{
16+
foreach (string word in words)
17+
{
18+
IList<Word> solutions = language.Analyze(word);
19+
Console.WriteLine("\n{0} için {1} çözüm bulundu:", word, solutions.Count);
20+
foreach (Word solution in solutions)
21+
{
22+
Console.WriteLine($"\t{solution}\n");
23+
//Console.WriteLine($"\t{solution.ToString(WordFormat.MyFormat)}\n");
24+
}
25+
}
26+
}
27+
28+
public static void AnalyzeTokensToFile(WordAnalyzer analyzer, IEnumerable<string> words,
29+
string undefinedOutputFilename)
30+
{
31+
IList<string> lines = new List<string>();
32+
foreach (string word in words)
33+
{
34+
string line = word;
35+
IList<Word> solutions = analyzer.Analyze(word);
36+
foreach (Word solution in solutions)
37+
{
38+
line += "\t" + solution;
39+
}
40+
lines.Add(line);
41+
}
42+
File.WriteAllLines(undefinedOutputFilename, lines);
43+
}
44+
45+
public static void Analyze(WordAnalyzer analyzer, string inputFilename, string undefinedOutputFilename)
46+
{
47+
IList<string> undefined = new List<string>();
48+
string[] lines = File.ReadAllLines(inputFilename, Encoding.UTF8);
49+
foreach (string line in lines)
50+
{
51+
IList<Word> solutions = analyzer.Analyze(line);
52+
if (!solutions.Any())
53+
{
54+
undefined.Add(line);
55+
}
56+
}
57+
File.WriteAllLines(undefinedOutputFilename, undefined);
58+
}
59+
60+
public static string[] Tokenize(string filename)
61+
{
62+
string text = File.ReadAllText(filename, Encoding.UTF8);
63+
string[] tokens = Regex.Split(text, @"\W+");
64+
return tokens;
65+
}
66+
}
67+
}

nuve.client/Program.cs

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,108 @@
1-

2-
// See https://aka.ms/new-console-template for more information
3-
using nuve;
1+
using System;
2+
using System.Linq;
3+
using Nuve.Lang;
4+
using Nuve.Reader;
45

5-
Console.WriteLine("Hello, World!");
6+
namespace Nuve.Client
7+
{
8+
public class Program
9+
{
10+
private static readonly Language Turkish = LanguageFactory.Create(LanguageType.Turkish);
611

7-
var x = new Class1();
12+
private static void Main(string[] args)
13+
{
14+
//Benchmarker.TestWithAMillionTokens(Turkish.Analyze);
15+
//Benchmarker.TestWithAMillionWords(Turkish.Analyze);
816

9-
System.Console.WriteLine(x.x);
17+
//GitHubReadmeExamples();
18+
19+
AnalysisHelper.Analyze(Turkish, new[] { "su", "eşkâli" });
20+
21+
//Generation();
22+
23+
//AnalysisAndStemming();
24+
25+
//SentenceSegmentation();
26+
27+
//ExternalLanguageReading();
28+
}
29+
30+
31+
private static void GitHubReadmeExamples()
32+
{
33+
var tr = LanguageFactory.Create(LanguageType.Turkish);
34+
var solutions = tr.Analyze("yolsuzu");
35+
36+
foreach (var solution in solutions)
37+
{
38+
Console.WriteLine("\t{0}", solution);
39+
Console.WriteLine("\toriginal:{0} stem:{1} root:{2}\n",
40+
solution.GetSurface(),
41+
solution.GetStem().GetSurface(),
42+
solution.Root); //Stemming
43+
}
44+
45+
//Method 1: Specify the ids of the morphemes that constitute the word
46+
var word1 = tr.Generate("kitap/ISIM", "IC_COGUL_lAr", "IC_SAHIPLIK_BEN_(U)m",
47+
"IC_HAL_BULUNMA_DA", "IC_AITLIK_ki", "IC_COGUL_lAr", "IC_HAL_AYRILMA_DAn");
48+
49+
//Method 2: Specify the string representation of the analysis of the word.
50+
var analysis = "kitap/ISIM IC_COGUL_lAr IC_SAHIPLIK_BEN_(U)m";
51+
var word2 = tr.GetWord(analysis);
52+
53+
Console.WriteLine(word1.GetSurface());
54+
Console.WriteLine(word2.GetSurface());
55+
}
56+
57+
private static void AnalysisAndStemming()
58+
{
59+
var tr = LanguageFactory.Create(LanguageType.Turkish);
60+
var stems = tr.Analyze("ehemmiyetsiz").Select(s=> s.Root.LexicalForm).ToList();
61+
62+
foreach (var stem in stems)
63+
{
64+
Console.WriteLine("\t{0}", stem);
65+
}
66+
67+
}
68+
69+
private static void Generation()
70+
{
71+
var tr = LanguageFactory.Create(LanguageType.Turkish);
72+
var solutions = tr.Analyze("suyu");
73+
var surfaces = solutions[0].GetSurfacesAfterEachPhase();
74+
foreach(var surface in surfaces)
75+
{
76+
Console.WriteLine(surface);
77+
}
78+
}
79+
80+
private static void ExternalLanguageReading()
81+
{
82+
var tr = new LanguageReader(@"C:\Users\harun_000\Dropbox\nuve\nuve-studio\lang\tr-TR").Read();
83+
var solutions = tr.Analyze("yolsuzu");
84+
Console.WriteLine(tr.Type.CultureCode);
85+
86+
foreach (var solution in solutions)
87+
{
88+
Console.WriteLine("\t{0}", solution);
89+
Console.WriteLine("\toriginal:{0} stem:{1} root:{2}\n",
90+
solution.GetSurface(),
91+
solution.GetStem().GetSurface(),
92+
solution.Root); //Stemming
93+
}
94+
}
95+
96+
// private static void SentenceSegmentation()
97+
// {
98+
// var paragraph = "Prof. Dr. Ahmet Bey 1.6 oranında artış var dedi 2. kez. E-posta adresi [email protected] imiş! Doğru mu?";
99+
// ITokenizer tokenizer = new ClassicTokenizer(true);
100+
// SentenceSegmenter segmenter = new TokenBasedSentenceSegmenter(tokenizer);
101+
// var sentences = segmenter.GetSentences(paragraph);
102+
// foreach (string sentence in sentences)
103+
// {
104+
// Console.WriteLine(sentence);
105+
// }
106+
// }
107+
}
108+
}

nuve/Condition/ConditionBase.cs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Nuve.Morphologic.Structure;
4+
using Nuve.Orthographic;
5+
using Nuve.Reader;
6+
7+
namespace Nuve.Condition
8+
{
9+
public enum Position
10+
{
11+
First,
12+
Previous,
13+
Next,
14+
This,
15+
Source,
16+
Target,
17+
BeforeTarget,
18+
AfterTarget,
19+
BeforeSource,
20+
Last
21+
};
22+
23+
public abstract class ConditionBase
24+
{
25+
protected readonly Alphabet Alphabet;
26+
protected readonly string Operand;
27+
protected readonly Position Position;
28+
29+
protected ConditionBase(string position, string operand, Alphabet alphabet)
30+
{
31+
if (!Enum.TryParse(position, out Position))
32+
{
33+
throw new ArgumentException("Invalid Morpheme Location: " + position);
34+
}
35+
Operand = operand;
36+
Alphabet = alphabet;
37+
}
38+
39+
protected bool TryGetOperandMorpheme(Allomorph allomorph, out Allomorph operand) // out parameter for result
40+
{
41+
switch (Position)
42+
{
43+
case Position.Next:
44+
case Position.Target:
45+
operand = allomorph.Next;
46+
return allomorph.HasNext;
47+
48+
case Position.Previous:
49+
case Position.BeforeSource:
50+
operand = allomorph.Previous;
51+
return allomorph.HasPrevious;
52+
53+
case Position.This:
54+
case Position.Source:
55+
operand = allomorph;
56+
return true;
57+
58+
case Position.First:
59+
operand = allomorph.First;
60+
return true;
61+
62+
case Position.AfterTarget:
63+
if (allomorph.HasNext)
64+
{
65+
operand = allomorph.Next.Next;
66+
return allomorph.Next.HasNext;
67+
}
68+
operand = null;
69+
return false;
70+
71+
default:
72+
throw new ArgumentException($"Invalid Argument : {Position}");
73+
}
74+
}
75+
76+
public abstract bool IsTrueFor(Allomorph allomorph);
77+
78+
protected HashSet<string> ParseLabels(string operand)
79+
{
80+
string[] labels = operand.Split(new[] {',', ' '}, StringSplitOptions.RemoveEmptyEntries);
81+
return new HashSet<string>(labels);
82+
}
83+
84+
public override string ToString()
85+
{
86+
return "operand: " + Operand;
87+
}
88+
}
89+
}

nuve/Condition/ConditionContainer.cs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Nuve.Morphologic.Structure;
4+
5+
namespace Nuve.Condition
6+
{
7+
public class ConditionContainer
8+
{
9+
private readonly IList<ConditionBase> _conditions;
10+
private readonly bool _flag;
11+
12+
public ConditionContainer(IList<ConditionBase> conditions, string flag)
13+
{
14+
if (String.IsNullOrEmpty(flag) || flag == "Or")
15+
{
16+
_flag = false;
17+
}
18+
else if (flag == "And")
19+
{
20+
_flag = true;
21+
}
22+
else
23+
{
24+
throw new ArgumentException("Ge�ersiz Flag de�eri, And veya Or olmal� :)");
25+
}
26+
27+
_conditions = conditions;
28+
}
29+
30+
public bool IsEmpty
31+
{
32+
get { return _conditions.Count == 0; }
33+
}
34+
35+
public bool IsTrue(Allomorph allomorph)
36+
{
37+
if (IsEmpty)
38+
{
39+
return true;
40+
}
41+
42+
return _flag ? AreAllConditionsTrue(allomorph) : IsAnyConditionTrue(allomorph);
43+
}
44+
45+
public static ConditionContainer EmptyContainer()
46+
{
47+
return new ConditionContainer(new List<ConditionBase>().AsReadOnly(), "");
48+
}
49+
50+
private bool AreAllConditionsTrue(Allomorph allomorph)
51+
{
52+
foreach (ConditionBase condition in _conditions)
53+
{
54+
if (!condition.IsTrueFor(allomorph))
55+
{
56+
return false;
57+
}
58+
}
59+
return true;
60+
}
61+
62+
private bool IsAnyConditionTrue(Allomorph allomorph)
63+
{
64+
foreach (ConditionBase condition in _conditions)
65+
{
66+
if (condition.IsTrueFor(allomorph))
67+
{
68+
return true;
69+
}
70+
}
71+
return false;
72+
}
73+
}
74+
}

0 commit comments

Comments
 (0)