Skip to content

Commit 17a7df5

Browse files
Merge pull request xceedsoftware#102 from VictorLoktev/master
Added check for restricted XML characters.
2 parents 81e1bc7 + 86d7b32 commit 17a7df5

File tree

6 files changed

+203
-22
lines changed

6 files changed

+203
-22
lines changed

DocX/HelperFunctions.cs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,18 @@ internal static class HelperFunctions
1717
public const string DOCUMENT_DOCUMENTTYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
1818
public const string TEMPLATE_DOCUMENTTYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml";
1919

20-
public static bool IsNullOrWhiteSpace(this string value)
20+
/// <summary>
21+
/// List of restricted character in xml: [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]
22+
/// See: https://www.w3.org/TR/xml11/#sec-xml11
23+
/// </summary>
24+
public static readonly char[] RestrictedXmlChar = new char[] {
25+
'\x1','\x2','\x3','\x4','\x5','\x6','\x7','\x8','\xb','\xc','\xe','\xf',
26+
'\x10','\x11','\x12','\x13','\x14','\x15','\x16','\x17','\x18','\x19','\x1a','\x1b','\x1c','\x1e','\x1f',
27+
'\x7f','\x80','\x81','\x82','\x83','\x84','\x86','\x87','\x88','\x89','\x8a','\x8b','\x8c','\x8d','\x8e','\x8f',
28+
'\x90','\x91','\x92','\x93','\x94','\x95','\x96','\x97','\x98','\x99','\x9a','\x9b','\x9c','\x9d','\x9e','\x9f'
29+
};
30+
31+
public static bool IsNullOrWhiteSpace(this string value)
2132
{
2233
if (value == null) return true;
2334
return string.IsNullOrEmpty(value.Trim());
@@ -589,7 +600,15 @@ internal static List<XElement> FormatInput(string text, XElement rPr)
589600
break;
590601

591602
default:
592-
sb.Append(c);
603+
// Check the character against restricted list:
604+
// RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]
605+
// See https://www.w3.org/TR/xml11/#sec-xml11
606+
if( RestrictedXmlChar.Contains( c ) )
607+
{
608+
// skip the character
609+
}
610+
else
611+
sb.Append(c);
593612
break;
594613
}
595614

DocX/Paragraph.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ public Paragraph InsertHyperlink(Hyperlink h, int index = 0)
10971097
XElement h_xml;
10981098
if (index == 0)
10991099
{
1100-
// Add this hyperlink as the last element.
1100+
// Add this hyperlink as the first element.
11011101
Xml.AddFirst(h.Xml);
11021102

11031103
// Extract the picture back out of the DOM.
@@ -1138,10 +1138,12 @@ public Paragraph InsertHyperlink(Hyperlink h, int index = 0)
11381138
h_xml = (XElement)run.Xml.NextNode;
11391139
}
11401140

1141-
h_xml.SetAttributeValue(DocX.r + "id", Id);
1142-
}
1141+
}
1142+
h_xml.SetAttributeValue( DocX.r + "id", Id );
11431143

1144-
return this;
1144+
this.runs = Xml.Elements().Last().Elements( XName.Get( "r", DocX.w.NamespaceName ) ).ToList();
1145+
1146+
return this;
11451147
}
11461148

11471149
/// <summary>

DocX/Table.cs

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -785,19 +785,29 @@ public TableDesign Design
785785

786786
if (design == TableDesign.Custom)
787787
{
788-
if (string.IsNullOrEmpty(_customTableDesignName))
789-
{
790-
design = TableDesign.None;
791-
if (style != null)
792-
style.Remove();
793-
794-
}
795-
else
796-
{
797-
val.Value = _customTableDesignName;
798-
}
799-
}
800-
else
788+
#region Code is commented out
789+
// The code gives a problem while copiing a table.
790+
// Look at Test_Clone_Table_Twice method in test.
791+
//Example:
792+
//Table tab1 = doc.Tables[ 0 ];
793+
//Table tab2 = doc.InsertTable( tab1 );
794+
//Table tab3 = doc.InsertTable( tab2 ); - here we have exception at "var styleElement =" line below in this method
795+
// The source of the problem is loosing the "<w:tblStyle w:val="a3"/>" by the commented code
796+
797+
//if (string.IsNullOrEmpty(_customTableDesignName))
798+
//{
799+
// design = TableDesign.None;
800+
// if (style != null)
801+
// style.Remove();
802+
803+
//}
804+
//else
805+
//{
806+
// val.Value = _customTableDesignName;
807+
//}
808+
#endregion
809+
}
810+
else
801811
{
802812
switch (design)
803813
{
@@ -1132,9 +1142,10 @@ from e in external_style_doc.Descendants()
11321142
let styleId = e.Attribute(XName.Get("styleId", DocX.w.NamespaceName))
11331143
where (styleId != null && styleId.Value == val.Value)
11341144
select e
1135-
).First();
1145+
).FirstOrDefault();
11361146

1137-
Document.styles.Element(XName.Get("styles", DocX.w.NamespaceName)).Add(styleElement);
1147+
if( styleElement != null )
1148+
Document.styles.Element(XName.Get("styles", DocX.w.NamespaceName)).Add(styleElement);
11381149
}
11391150
}
11401151
}

DocX/bin/Release/DocX.XML

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

DocX/bin/Release/DocX.dll

0 Bytes
Binary file not shown.

UnitTests/DocXUnitTests.cs

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,28 @@ public void TableWithSpecifiedWidths()
150150
}
151151
}
152152

153+
[Test]
154+
public void Test_InvalidCharacter()
155+
{
156+
using( var output = File.Open( Path.Combine( _directoryWithFiles, "InvalidCharacters.docx" ), FileMode.Create ) )
157+
{
158+
using( var doc = DocX.Create( output ) )
159+
{
160+
doc.InsertParagraph( "\b" );
161+
Exception ex = null;
162+
try
163+
{
164+
doc.Save();
165+
}
166+
catch( Exception e )
167+
{
168+
ex = e;
169+
}
170+
Assert.IsTrue( ex == null );
171+
}
172+
}
173+
}
174+
153175
/// <summary>
154176
/// TextRemove should not remove empty paragraphs in case the paragraph is alone in the cell.
155177
/// In the rest cases empty paragraph may be removed.
@@ -274,6 +296,29 @@ public void Test_Table_InsertRow_Keeps_Formatting()
274296
}
275297
}
276298

299+
[Test]
300+
public void Test_Clone_Table_Twice()
301+
{
302+
using( var input = File.Open( Path.Combine( _directoryWithFiles, "TableSpecifiedHeights.docx" ), FileMode.Open ) )
303+
{
304+
using( var doc = DocX.Load( input ) )
305+
{
306+
// Make sure content of the file is ok for test
307+
Assert.IsTrue( doc.Tables.Count == 1 );
308+
309+
Table tab1 = doc.Tables[ 0 ];
310+
doc.InsertParagraph( "" );
311+
Table tab2 = doc.InsertTable( tab1 );
312+
Assert.IsTrue( doc.Tables.Count == 2 );
313+
doc.InsertParagraph( "" );
314+
Table tab3 = doc.InsertTable( tab2 );
315+
Assert.IsTrue( doc.Tables.Count == 3 );
316+
317+
doc.SaveAs( Path.Combine( _directoryWithFiles, "TwoClonedTables.docx" ) );
318+
}
319+
}
320+
}
321+
277322
public string ReplaceFunc(string findStr)
278323
{
279324
var testPatterns = new Dictionary<string, string>
@@ -803,7 +848,105 @@ public void Test_Insert_Picture()
803848
}
804849
}
805850

806-
[Test]
851+
/// <summary>
852+
/// This test fills two tables with hyperlinks.
853+
/// </summary>
854+
[Test]
855+
public void Test_Insert_Hyperlink_In_Tables()
856+
{
857+
using( var input = File.Open( Path.Combine( _directoryWithFiles, "TableSpecifiedHeights.docx" ), FileMode.Open ) )
858+
{
859+
using( var doc = DocX.Load( input ) )
860+
{
861+
// Make sure content of the file is ok for test
862+
Assert.IsTrue( doc.Tables.Count > 0 );
863+
Table tab1 = doc.Tables[ 0 ];
864+
Assert.IsTrue( tab1.RowCount > 0 );
865+
Assert.IsTrue( tab1.Rows[0].ColumnCount > 0 );
866+
doc.InsertParagraph( "" );
867+
Table tab2 = doc.InsertTable( tab1 );
868+
Assert.IsTrue( tab2.RowCount > 0 );
869+
870+
Row row1 = tab1.Rows[ 0 ];
871+
Row row2 = tab2.Rows[ 0 ];
872+
873+
// 10 times insert hyperlinks in both tables in tic-tak order
874+
for( int index = 0; index < 10; index++ )
875+
{
876+
Row newRow1 = tab1.InsertRow( row1 );
877+
Row newRow2 = tab2.InsertRow( row2 );
878+
879+
Hyperlink h1 = doc.AddHyperlink(
880+
string.Format( "Table {0}, Row {1}. Google searches for {0} {1}", 1, index + 1 ),
881+
new Uri( string.Format( "https://www.google.com/search?q=Table{0}Row{1}", 1, index + 1 ) ) );
882+
newRow1.Cells[ 0 ].Paragraphs[ 0 ].InsertHyperlink( h1 );
883+
884+
Hyperlink h2 = doc.AddHyperlink(
885+
string.Format( "Table {0}, Row {1}. Google searches for {0} {1}", 2, index + 1 ),
886+
new Uri( string.Format( "https://www.google.com/search?q=Table{0}Row{1}", 2, index + 1 ) ) );
887+
newRow2.Cells[ 0 ].Paragraphs[ 0 ].InsertHyperlink( h2 );
888+
889+
}
890+
//Make sure links are ok and in right order
891+
for( int index = 0; index < doc.Hyperlinks.Count; index++ )
892+
{
893+
Hyperlink h = doc.Hyperlinks[ index ];
894+
string text = string.Format( "Table {0}, Row {1}. Google searches for {0} {1}", ( index / 10 ) + 1, ( index ) % 10 + 1 );
895+
string uri = string.Format( "https://www.google.com/search?q=Table{0}Row{1}", ( index / 10 ) + 1, ( index ) % 10 + 1 );
896+
Assert.IsTrue( string.Compare( h.Text, text ) == 0 );
897+
Assert.IsTrue( h.Uri != null );
898+
Assert.IsTrue( string.Compare( h.Uri.ToString(), uri ) == 0 );
899+
}
900+
doc.SaveAs( Path.Combine( _directoryDocuments, "Test_Insert_Hyperlink_In_Tables.docx" ) );
901+
}
902+
}
903+
}
904+
905+
/// <summary>
906+
/// This test makes 2 file. The first uses InsertHyperlink. The second uses AppendHyperlink.
907+
/// The both hyperlink collections should be equal to each other.
908+
/// We need be sure the bug in InsertHyperlink is fixed (id attribute in hyperlink was empty and order of inserteed hyperlinks was broken).
909+
/// </summary>
910+
[Test]
911+
public void Test_Compare_InsertHyperlink_And_AppendHyperLinks()
912+
{
913+
string fileName1 = Path.Combine( _directoryDocuments, "Test_InsertHyperLinks.docx" );
914+
string fileName2 = Path.Combine( _directoryDocuments, "Test_AppendHyperlinks.docx" );
915+
using( DocX document1 = DocX.Create( fileName1 ) )
916+
{
917+
using( DocX document2 = DocX.Create( fileName2 ) )
918+
{
919+
for( int index = 0; index < 10; index++ )
920+
{
921+
Hyperlink h = document1.AddHyperlink(
922+
string.Format( "Google searches for {0}", index + 1 ),
923+
new Uri( string.Format( "https://www.google.com/search?q={0}", index + 1 ) ) );
924+
document1.InsertParagraph( "" ).InsertHyperlink( h );
925+
}
926+
document1.Save();
927+
928+
for( int index = 0; index < 10; index++ )
929+
{
930+
Hyperlink h = document2.AddHyperlink(
931+
string.Format( "Google searches for {0}", index + 1 ),
932+
new Uri( string.Format( "https://www.google.com/search?q={0}", index + 1 ) ) );
933+
document2.InsertParagraph( "" ).AppendHyperlink( h );
934+
}
935+
document2.Save();
936+
937+
Assert.IsTrue( document1.Hyperlinks.Count == document2.Hyperlinks.Count );
938+
for( int index = 0; index < document1.Hyperlinks.Count; index++ )
939+
{
940+
Hyperlink h1 = document1.Hyperlinks[ index ];
941+
Hyperlink h2 = document2.Hyperlinks[ index ];
942+
Assert.IsTrue( string.Compare( h1.Text, h2.Text ) == 0 );
943+
Assert.IsTrue( string.Compare( h1.Uri.ToString(), h2.Uri.ToString() ) == 0 );
944+
}
945+
}
946+
}
947+
}
948+
949+
[Test]
807950
public void Test_Insert_Hyperlink()
808951
{
809952
// Load test document.

0 commit comments

Comments
 (0)