XML Deserialisation & Malformed XML [modified]
-
Hello all, I have a curious problem relating to XML deserialisation where the deserialiser allows malformed XML to pass through without issue. This needs some further information, so without further ado: I have the following simple schema:
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" attributeFormDefault="unqualified">
<xs:element name="TestSchema">
xs:complexType
xs:sequence
<xs:element name="Widgets" type="xs:string" nillable="false"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>From this I have created .NET 2 c# classes using the XSD tool, this result in the following class
using System.Xml.Serialization;
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.42")]
[System.SerializableAttribute()]
[System.Diagnostics.DebuggerStepThroughAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
[System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)]
[System.Xml.Serialization.XmlRootAttribute(Namespace="", IsNullable=false)]
public partial class TestSchema {private string widgetsField; /// <remarks/> public string Widgets { get { return this.widgetsField; } set { this.widgetsField = value; } }
}
This is then added to a simple test project that looks like this:
static void Main(string[] args)
{
//input XML
string rawXml = "<TestSchema>";
rawXml += "<Widgets>A value for widgets</Widgets>";
rawXml += "<LaLa>An element that doesnt exist in the schema</LaLa>";
rawXml += "</TestSchema>";//deserialise byte\[\] data = System.Text.Encoding.ASCII.GetBytes(rawXml); MemoryStream fs = new MemoryStream(data); TextReader reader = new StreamReader(fs); XmlSerializer ser = new XmlSerializer(typeof(TestSchema)); TestSchema obj = (TestSchema)ser.Deserialize(reader); reader.Close(); //output Console.WriteLine(obj.GetHashCode()); Console.ReadLine();
}
Now all this works, however I wouldn't expect it to! The more eagled eyed will have noticed the following line in the code above:
rawXml += "<LaLa>An element that doesnt exist in the schema</LaLa>";
When the deserialiser is processing the XML string, it seems that a
-
Hello all, I have a curious problem relating to XML deserialisation where the deserialiser allows malformed XML to pass through without issue. This needs some further information, so without further ado: I have the following simple schema:
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" attributeFormDefault="unqualified">
<xs:element name="TestSchema">
xs:complexType
xs:sequence
<xs:element name="Widgets" type="xs:string" nillable="false"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>From this I have created .NET 2 c# classes using the XSD tool, this result in the following class
using System.Xml.Serialization;
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.42")]
[System.SerializableAttribute()]
[System.Diagnostics.DebuggerStepThroughAttribute()]
[System.ComponentModel.DesignerCategoryAttribute("code")]
[System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)]
[System.Xml.Serialization.XmlRootAttribute(Namespace="", IsNullable=false)]
public partial class TestSchema {private string widgetsField; /// <remarks/> public string Widgets { get { return this.widgetsField; } set { this.widgetsField = value; } }
}
This is then added to a simple test project that looks like this:
static void Main(string[] args)
{
//input XML
string rawXml = "<TestSchema>";
rawXml += "<Widgets>A value for widgets</Widgets>";
rawXml += "<LaLa>An element that doesnt exist in the schema</LaLa>";
rawXml += "</TestSchema>";//deserialise byte\[\] data = System.Text.Encoding.ASCII.GetBytes(rawXml); MemoryStream fs = new MemoryStream(data); TextReader reader = new StreamReader(fs); XmlSerializer ser = new XmlSerializer(typeof(TestSchema)); TestSchema obj = (TestSchema)ser.Deserialize(reader); reader.Close(); //output Console.WriteLine(obj.GetHashCode()); Console.ReadLine();
}
Now all this works, however I wouldn't expect it to! The more eagled eyed will have noticed the following line in the code above:
rawXml += "<LaLa>An element that doesnt exist in the schema</LaLa>";
When the deserialiser is processing the XML string, it seems that a
It would seem I have found the solution, a tweak to the code above and malformed XML is no longer allowed:
static void Main(string[] args)
{
//input XML
string rawXml = "<TestSchema>";
rawXml += "<Widgets>A value for widgets</Widgets>";
rawXml += "<LaLa>Hold on whats this an element that doesnt exist in the schema</LaLa>";
rawXml += "</TestSchema>";//deserialise XmlSerializer s = new XmlSerializer(typeof(TestSchema)); byte\[\] buffer = ASCIIEncoding.UTF8.GetBytes(rawXml); MemoryStream ms = new MemoryStream(buffer); XmlReader reader = new XmlTextReader(ms); XmlDeserializationEvents deserializationEvents = new XmlDeserializationEvents(); deserializationEvents.OnUnknownAttribute = new XmlAttributeEventHandler(UnknownAttributeEventHandler); deserializationEvents.OnUnknownElement = new XmlElementEventHandler(UnknownElementEventHandler); deserializationEvents.OnUnknownNode = new XmlNodeEventHandler(UnknownNodeEventHandler); deserializationEvents.OnUnreferencedObject = new UnreferencedObjectEventHandler(UnreferencedObjEventHandler); TestSchema obj = (TestSchema)s.Deserialize(reader, deserializationEvents); reader.Close(); //output Console.WriteLine(obj.GetHashCode()); Console.ReadLine();
}
private static void UnknownAttributeEventHandler(object sender, XmlAttributeEventArgs e)
{
//handle event condition
}private static void UnknownElementEventHandler(object sender, XmlElementEventArgs e)
{
//handle event condition
}private static void UnknownNodeEventHandler(object sender, XmlNodeEventArgs e)
{
//handle event condition
}private static void UnreferencedObjEventHandler(object sender, UnreferencedObjectEventArgs e)
{
//handle event condition
}As you can see in the code above, the XmlSerializer.Deserialize can also take an instance of XmlDeserializationEvents. If this is setup and passed in these events are created and from there I can handle the error condition.
-
It would seem I have found the solution, a tweak to the code above and malformed XML is no longer allowed:
static void Main(string[] args)
{
//input XML
string rawXml = "<TestSchema>";
rawXml += "<Widgets>A value for widgets</Widgets>";
rawXml += "<LaLa>Hold on whats this an element that doesnt exist in the schema</LaLa>";
rawXml += "</TestSchema>";//deserialise XmlSerializer s = new XmlSerializer(typeof(TestSchema)); byte\[\] buffer = ASCIIEncoding.UTF8.GetBytes(rawXml); MemoryStream ms = new MemoryStream(buffer); XmlReader reader = new XmlTextReader(ms); XmlDeserializationEvents deserializationEvents = new XmlDeserializationEvents(); deserializationEvents.OnUnknownAttribute = new XmlAttributeEventHandler(UnknownAttributeEventHandler); deserializationEvents.OnUnknownElement = new XmlElementEventHandler(UnknownElementEventHandler); deserializationEvents.OnUnknownNode = new XmlNodeEventHandler(UnknownNodeEventHandler); deserializationEvents.OnUnreferencedObject = new UnreferencedObjectEventHandler(UnreferencedObjEventHandler); TestSchema obj = (TestSchema)s.Deserialize(reader, deserializationEvents); reader.Close(); //output Console.WriteLine(obj.GetHashCode()); Console.ReadLine();
}
private static void UnknownAttributeEventHandler(object sender, XmlAttributeEventArgs e)
{
//handle event condition
}private static void UnknownElementEventHandler(object sender, XmlElementEventArgs e)
{
//handle event condition
}private static void UnknownNodeEventHandler(object sender, XmlNodeEventArgs e)
{
//handle event condition
}private static void UnreferencedObjEventHandler(object sender, UnreferencedObjectEventArgs e)
{
//handle event condition
}As you can see in the code above, the XmlSerializer.Deserialize can also take an instance of XmlDeserializationEvents. If this is setup and passed in these events are created and from there I can handle the error condition.
-
Since posting the original solution I have found that you can also set these event directly on the XmlSerializer
XmlSerializer xmlSerializer = new XmlSerializer(objectType);
xmlSerializer.UnknownAttribute += new XmlAttributeEventHandler(xmlSerializer_UnknownAttribute);
xmlSerializer.UnknownElement += new XmlElementEventHandler(xmlSerializer_UnknownElement);
xmlSerializer.UnknownNode += new XmlNodeEventHandler(xmlSerializer_UnknownNode);
xmlSerializer.UnreferencedObject += new UnreferencedObjectEventHandler(xmlSerializer_UnreferencedObject);From a functionality point of view I don't suppose it makes any difference at all, however IMHO this method is neater.