Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port POI patches #1434

Merged
merged 9 commits into from
Nov 11, 2024
13 changes: 13 additions & 0 deletions OpenXmlFormats/Spreadsheet/Sheet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10726,6 +10726,19 @@ public void SetHyperlinkArray(CT_Hyperlink[] array)
{
hyperlinkField = new List<CT_Hyperlink>(array);
}

public int SizeOfHyperlinkArray()
{
return this.hyperlinkField == null ? 0 : this.hyperlinkField.Count;
}

public void RemoveHyperlink(int index)
{
if (this.hyperlink == null)
return;
this.hyperlinkField.RemoveAt(index);
}

[XmlElement("hyperlink", IsNullable = false)]
public List<CT_Hyperlink> hyperlink
{
Expand Down
12 changes: 8 additions & 4 deletions OpenXmlFormats/Spreadsheet/Sheet/CT_Worksheet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,12 @@ public bool IsSetDimension()
{
return this.dimensionField != null;
}

public void UnsetHyperlinks()
{
this.hyperlinksField = null;
}

public CT_SheetProtection AddNewSheetProtection()
{
this.sheetProtectionField = new CT_SheetProtection();
Expand Down Expand Up @@ -483,10 +489,7 @@ public bool IsSetPageMargins()
{
return this.pageMarginsField != null;
}
public bool IsSetHyperLinks()
{
return this.hyperlinksField != null;
}

public bool IsSetSheetPr()
{
return this.sheetPrField != null;
Expand Down Expand Up @@ -1014,6 +1017,7 @@ public CT_IgnoredErrors AddNewIgnoredErrors()
this.ignoredErrorsField = new CT_IgnoredErrors();
return this.ignoredErrorsField;
}

}

}
89 changes: 57 additions & 32 deletions main/SS/Formula/FormulaParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ namespace NPOI.SS.Formula
using NPOI.SS.UserModel;
using NPOI.SS.Util;
using NPOI.Util;
using SixLabors.Fonts.Unicode;

/// <summary>
/// Specific exception thrown when a supplied formula does not Parse properly.
Expand Down Expand Up @@ -63,6 +64,7 @@ public class FormulaParser
{
private String _formulaString;
private int _formulaLength;
/** points at the next character to be read (after the {@link #look} codepoint) */
private int _pointer;

private ParseNode _rootNode;
Expand All @@ -72,10 +74,10 @@ public class FormulaParser
private const char LF = '\n'; // Normally just XSSF

/**
* Lookahead Character.
* Lookahead unicode codepoint.
* Gets value '\0' when the input string is exhausted
*/
private char look;
private int look;

/**
* Tracks whether the run of whitespace preceeding "look" could be an
Expand Down Expand Up @@ -183,7 +185,7 @@ private void GetChar()
}
if (_pointer < _formulaLength)
{
look = _formulaString[_pointer];
look = _formulaString.CodePointAt(_pointer);
}
else
{
Expand All @@ -192,7 +194,7 @@ private void GetChar()
look = (char)0;
_inIntersection = false;
}
_pointer++;
_pointer+= StringUtil.CharCount(look);
//Console.WriteLine("Got char: "+ look);
}

Expand All @@ -208,37 +210,47 @@ private Exception expected(String s)
}
else
{
msg = "Parse error near char " + (_pointer - 1) + " '" + look + "'"
msg = "Parse error near char " + (_pointer - 1) + " '" + char.ConvertFromUtf32(look) + "'"
+ " in specified formula '" + _formulaString + "'. Expected "
+ s;
}
return new FormulaParseException(msg);
}

/** Recognize an Alpha Character */
private static bool IsAlpha(char c)
private static bool IsAlpha(int c)
{
return Char.IsLetter(c) || c == '$' || c == '_';
return CodePoint.IsLetter(new CodePoint(c)) || c == '$' || c == '_';
}

private static bool IsLetter(int c)
{
return CodePoint.IsLetter(new CodePoint(c));
}

/** Recognize a Decimal Digit */
private static bool IsDigit(char c)
private static bool IsDigit(int c)
{
return Char.IsDigit(c);
return CodePoint.IsDigit(new CodePoint(c));
}

/** Recognize an Alphanumeric */
private static bool IsAlNum(char c)
private static bool IsAlNum(int c)
{
return IsAlpha(c) || IsDigit(c);
}

/** Recognize White Space */
private static bool IsWhite(char c)
private static bool IsWhite(int c)
{
return c == ' ' || c == TAB || c == CR || c == LF;
}

private static bool IsLetterOrDigit(int c)
{
return CodePoint.IsLetterOrDigit(new CodePoint(c));
}

/** Skip Over Leading White Space */
private void SkipWhite()
{
Expand All @@ -253,7 +265,7 @@ private void SkipWhite()
* unchecked exception. This method does <b>not</b> consume whitespace (before or after the
* matched character).
*/
private void Match(char x)
private void Match(int x)
{
if (look != x)
{
Expand All @@ -268,9 +280,9 @@ private String ParseUnquotedIdentifier()
throw expected("unquoted identifier");
}
StringBuilder sb = new StringBuilder();
while (Char.IsLetterOrDigit(look) || look == '.')
while (IsLetterOrDigit(look) || look == '.')
{
sb.Append(look);
sb.Append(char.ConvertFromUtf32(look));
GetChar();
}
if (sb.Length < 1)
Expand All @@ -285,9 +297,9 @@ private String GetNum()
{
StringBuilder value = new StringBuilder();

while (IsDigit(this.look))
while (IsDigit(look))
{
value.Append(this.look);
value.Append(char.ConvertFromUtf32(look));
GetChar();
}
return value.Length == 0 ? null : value.ToString();
Expand Down Expand Up @@ -383,9 +395,15 @@ private static bool NeedsMemFunc(ParseNode root)
*
* @return <c>true</c> if the specified character may be used in a defined name
*/
private static bool IsValidDefinedNameChar(char ch)
private static bool IsValidDefinedNameChar(int ch)
{
if (Char.IsLetterOrDigit(ch))
if (IsLetterOrDigit(ch))
{
return true;
}
// the sheet naming rules are vague on whether unicode characters are allowed
// assume they're allowed.
if (ch > 128)
{
return true;
}
Expand All @@ -397,6 +415,7 @@ private static bool IsValidDefinedNameChar(char ch)
case '\\': // of all things
return true;
}
// includes special non-name control characters like ! $ : , ( ) [ ] and space
return false;
}
/**
Expand Down Expand Up @@ -982,7 +1001,7 @@ private String ParseAsColumnQuantifier()
StringBuilder name = new StringBuilder();
while (look != ']')
{
name.Append(look);
name.Append(char.ConvertFromUtf32(look));
GetChar();
}
Match(']');
Expand Down Expand Up @@ -1030,7 +1049,7 @@ private ParseNode ParseNonRange(int savePointer)
{
ResetPointer(savePointer);

if (Char.IsDigit(look))
if (IsDigit(look))
{
return new ParseNode(ParseNumber());
}
Expand Down Expand Up @@ -1083,13 +1102,13 @@ private String ParseAsName()
StringBuilder sb = new StringBuilder();

// defined names may begin with a letter or underscore or backslash
if (!char.IsLetter(look) && look != '_' && look != '\\')
if (!IsLetter(look) && look != '_' && look != '\\')
{
throw expected("number, string, defined name, or data table");
}
while (IsValidDefinedNameChar(look))
{
sb.Append(look);
sb.Append(char.ConvertFromUtf32(look));
GetChar();
}
SkipWhite();
Expand Down Expand Up @@ -1400,7 +1419,7 @@ private String GetBookName()
GetChar();
while (look != ']')
{
sb.Append(look);
sb.Append(char.ConvertFromUtf32(look));
GetChar();
}
GetChar();
Expand Down Expand Up @@ -1435,7 +1454,7 @@ private SheetIdentifier ParseSheetName()
bool done = look == '\'';
while (!done)
{
sb.Append(look);
sb.Append(char.ConvertFromUtf32(look));
GetChar();
if (look == '\'')
{
Expand All @@ -1461,13 +1480,13 @@ private SheetIdentifier ParseSheetName()
}

// unquoted sheet names must start with underscore or a letter
if (look == '_' || Char.IsLetter(look))
if (look == '_' || IsLetter(look))
{
StringBuilder sb = new StringBuilder();
// can concatenate idens with dots
while (IsUnquotedSheetNameChar(look))
{
sb.Append(look);
sb.Append(char.ConvertFromUtf32(look));
GetChar();
}
NameIdentifier iden = new NameIdentifier(sb.ToString(), false);
Expand Down Expand Up @@ -1511,9 +1530,15 @@ private SheetIdentifier ParseSheetRange(String bookname, NameIdentifier sheet1Na
/**
* very similar to {@link SheetNameFormatter#isSpecialChar(char)}
*/
private bool IsUnquotedSheetNameChar(char ch)
private bool IsUnquotedSheetNameChar(int ch)
{
if (Char.IsLetterOrDigit(ch))
if (IsLetterOrDigit(ch))
{
return true;
}
// the sheet naming rules are vague on whether unicode characters are allowed
// assume they're allowed.
if (ch > 128)
{
return true;
}
Expand All @@ -1530,7 +1555,7 @@ private void ResetPointer(int ptr)
_pointer = ptr;
if (_pointer <= _formulaLength)
{
look = _formulaString[_pointer - 1];
look = _formulaString.CodePointAt(_pointer - StringUtil.CharCount(look));
}
else
{
Expand Down Expand Up @@ -1761,7 +1786,7 @@ private void ValidateNumArgs(int numArgs, FunctionMetadata fm)
}
}

private static bool IsArgumentDelimiter(char ch)
private static bool IsArgumentDelimiter(int ch)
{
return ch == ',' || ch == ')';
}
Expand Down Expand Up @@ -1875,7 +1900,7 @@ private ParseNode ParseSimpleFactor()
}
// named ranges and tables can start with underscore or backslash
// see https://support.office.com/en-us/article/Define-and-use-names-in-formulas-4d0f13ac-53b7-422e-afd2-abd7ff379c64?ui=en-US&rs=en-US&ad=US#bmsyntax_rules_for_names
if (IsAlpha(look) || Char.IsDigit(look) || look == '\'' || look == '[' || look == '_' || look == '\\')
if (IsAlpha(look) || IsDigit(look) || look == '\'' || look == '[' || look == '_' || look == '\\')
{
return ParseRangeExpression();
}
Expand Down Expand Up @@ -2237,7 +2262,7 @@ private String ParseStringLiteral()
break;
}
}
Token.Append(look);
Token.Append(char.ConvertFromUtf32(look));
GetChar();
}
return Token.ToString();
Expand Down
24 changes: 20 additions & 4 deletions main/Util/StringUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,17 @@ namespace NPOI.Util
/// @since May 10, 2002
/// @version 1.0
/// </summary>
public class StringUtil
public static class StringUtil
{
private static Encoding ISO_8859_1 = Encoding.GetEncoding("ISO-8859-1");
private static Encoding UTF16LE = Encoding.Unicode;
private static Dictionary<int, int> msCodepointToUnicode;
/**
* Constructor for the StringUtil object
*/
private StringUtil()
{
}
//private StringUtil()
//{
//}

/// <summary>
/// Given a byte array of 16-bit unicode characters in Little Endian
Expand Down Expand Up @@ -877,5 +877,21 @@ public static int CountMatches(string haystack, char needle)
}
return count;
}

public static int CodePointAt(this string text, int index)
{
if (!char.IsSurrogate(text[index]))
{
return (int)text[index];
}
if (index + 1 < text.Length && char.IsSurrogatePair(text[index], text[index + 1]))
{
return char.ConvertToUtf32(text[index], text[index+1]);
}
else
{
throw new Exception("String was not well-formed UTF-16.");
}
}
}
}
Loading
Loading