I have an XML file with a specified schema location such as this:
xsi:schemaLocation="someurl ..\localSchemaPath.xsd"
I want to validate in C#. Visual Studio, when I open the file, validates it against the schema and lists errors perfectly. Somehow, though, I can't seem to validate it automatically in C# without specifying the schema to validate against like so:
XmlDocument asset = new XmlDocument();
XmlTextReader schemaReader = new XmlTextReader("relativeSchemaPath");
XmlSchema schema = XmlSchema.Read(schemaReader, SchemaValidationHandler);
asset.Schemas.Add(schema);
asset.Load(filename);
asset.Validate(DocumentValidationHandler);
Shouldn't I be able to validate with the schema specified in the XML file automatically ? What am I missing ?
You need to create an XmlReaderSettings instance and pass that to your XmlReader when you create it. Then you can subscribe to the ValidationEventHandler in the settings to receive validation errors. Your code will end up looking like this:
using System.Xml;
using System.Xml.Schema;
using System.IO;
public class ValidXSD
{
public static void Main()
{
// Set the validation settings.
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.Schema;
settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
settings.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallBack);
// Create the XmlReader object.
XmlReader reader = XmlReader.Create("inlineSchema.xml", settings);
// Parse the file.
while (reader.Read()) ;
}
// Display any warnings or errors.
private static void ValidationCallBack(object sender, ValidationEventArgs args)
{
if (args.Severity == XmlSeverityType.Warning)
Console.WriteLine("\tWarning: Matching schema not found. No validation occurred." + args.Message);
else
Console.WriteLine("\tValidation error: " + args.Message);
}
}
A simpler way, if you are using .NET 3.5, is to use XDocument and XmlSchemaSet validation.
XmlSchemaSet schemas = new XmlSchemaSet();
schemas.Add(schemaNamespace, schemaFileName);
XDocument doc = XDocument.Load(filename);
string msg = "";
doc.Validate(schemas, (o, e) => {
msg += e.Message + Environment.NewLine;
});
Console.WriteLine(msg == "" ? "Document is valid" : "Document invalid: " + msg);
See the MSDN documentation for more assistance.
personally I favor validating without a callback:
public bool ValidateSchema(string xmlPath, string xsdPath)
{
XmlDocument xml = new XmlDocument();
xml.Load(xmlPath);
xml.Schemas.Add(null, xsdPath);
try
{
xml.Validate(null);
}
catch (XmlSchemaValidationException)
{
return false;
}
return true;
}
(see Timiz0r's post in Synchronous XML Schema Validation? .NET 3.5)
The following example validates an XML file and generates the appropriate error or warning.
using System;
using System.IO;
using System.Xml;
using System.Xml.Schema;
public class Sample
{
public static void Main()
{
//Load the XmlSchemaSet.
XmlSchemaSet schemaSet = new XmlSchemaSet();
schemaSet.Add("urn:bookstore-schema", "books.xsd");
//Validate the file using the schema stored in the schema set.
//Any elements belonging to the namespace "urn:cd-schema" generate
//a warning because there is no schema matching that namespace.
Validate("store.xml", schemaSet);
Console.ReadLine();
}
private static void Validate(String filename, XmlSchemaSet schemaSet)
{
Console.WriteLine();
Console.WriteLine("\r\nValidating XML file {0}...", filename.ToString());
XmlSchema compiledSchema = null;
foreach (XmlSchema schema in schemaSet.Schemas())
{
compiledSchema = schema;
}
XmlReaderSettings settings = new XmlReaderSettings();
settings.Schemas.Add(compiledSchema);
settings.ValidationEventHandler += new ValidationEventHandler(ValidationCallBack);
settings.ValidationType = ValidationType.Schema;
//Create the schema validating reader.
XmlReader vreader = XmlReader.Create(filename, settings);
while (vreader.Read()) { }
//Close the reader.
vreader.Close();
}
//Display any warnings or errors.
private static void ValidationCallBack(object sender, ValidationEventArgs args)
{
if (args.Severity == XmlSeverityType.Warning)
Console.WriteLine("\tWarning: Matching schema not found. No validation occurred." + args.Message);
else
Console.WriteLine("\tValidation error: " + args.Message);
}
}
The preceding example uses the following input files.
<?xml version='1.0'?>
<bookstore xmlns="urn:bookstore-schema" xmlns:cd="urn:cd-schema">
<book genre="novel">
<title>The Confidence Man</title>
<price>11.99</price>
</book>
<cd:cd>
<title>Americana</title>
<cd:artist>Offspring</cd:artist>
<price>16.95</price>
</cd:cd>
</bookstore>
books.xsd
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns="urn:bookstore-schema"
elementFormDefault="qualified"
targetNamespace="urn:bookstore-schema">
<xsd:element name="bookstore" type="bookstoreType"/>
<xsd:complexType name="bookstoreType">
<xsd:sequence maxOccurs="unbounded">
<xsd:element name="book" type="bookType"/>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="bookType">
<xsd:sequence>
<xsd:element name="title" type="xsd:string"/>
<xsd:element name="author" type="authorName"/>
<xsd:element name="price" type="xsd:decimal"/>
</xsd:sequence>
<xsd:attribute name="genre" type="xsd:string"/>
</xsd:complexType>
<xsd:complexType name="authorName">
<xsd:sequence>
<xsd:element name="first-name" type="xsd:string"/>
<xsd:element name="last-name" type="xsd:string"/>
</xsd:sequence>
</xsd:complexType>
</xsd:schema>
I had do this kind of automatic validation in VB and this is how I did it (converted to C#):
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.Schema;
settings.ValidationFlags = settings.ValidationFlags |
Schema.XmlSchemaValidationFlags.ProcessSchemaLocation;
XmlReader XMLvalidator = XmlReader.Create(reader, settings);
Then I subscribed to the settings.ValidationEventHandler event while reading the file.
Related
I'm having a problem with .NET XML parser. When parsing a DITA document, the parser should add a default attribute value in a namespace of it's own, but when the namespace is not declared in the document, the parser throws an exception:
Default attribute 'http://dita.oasis-open.org/architecture/2005/:DITAArchVersion'
for element 'topic' could not be applied as the attribute namespace
is not mapped to a prefix in the instance document.
Here's a minimal sample topic that shows the behaviour...:
<?xml version="1.0" encoding="UTF-8"?>
<topic xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="so1"
xsi:noNamespaceSchemaLocation="urn:oasis:names:tc:dita:xsd:topic.xsd">
<title>StackOverflow!</title>
</topic>
Documents like this work fine in other environments, which leads me to thinking that we must somehow be missing something in the parser configuration. This is how the document is currently parsed:
XmlReaderSettings settings = new XmlReaderSettings();
if (validate) {
settings.ValidationType = ValidationType.Schema;
settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
settings.ValidationEventHandler += new ValidationEventHandler(
delegate(Object sender, ValidationEventArgs args) {
throw new Exception("Validation error for document " +
pathinfo + ": " + args.Message);
});
}
settings.DtdProcessing = DtdProcessing.Prohibit;
settings.XmlResolver = new CatalogXmlResolver(/*...*/);
XmlReader reader = XmlReader.Create(new MemoryStream(data), settings);
XmlDocument doc = new XmlDocument();
if (!validate) { doc.PreserveWhitespace = true; }
doc.XmlResolver = getCatalog();
doc.Load(reader);
How can I tell the parser to also add the corresponding namespace as it adds the default attribute?
Edit: Here's the stack trace:
Void <parseDocument>b__0(System.Object, System.Xml.Schema.ValidationEventArgs) System.Exception: Validation error for document /temp/Untitled1.xml: Default attribute 'http://dita.oasis-open.org/architecture/2005/:DITAArchVersion' for element 'topic' could not be applied as the attribute namespace is not mapped to a prefix in the instance document.
at (...)
at System.Xml.Schema.XmlSchemaValidator.SendValidationEvent(XmlSchemaValidationException e, XmlSeverityType severity)
at System.Xml.Schema.XmlSchemaValidator.SendValidationEvent(String code, String[] args)
at System.Xml.Schema.XmlSchemaValidator.GetUnspecifiedDefaultAttributes(ArrayList defaultAttributes, Boolean createNodeData)
at System.Xml.XsdValidatingReader.ValidateAttributes()
at System.Xml.XsdValidatingReader.ProcessElementEvent()
at System.Xml.XsdValidatingReader.ProcessReaderEvent()
at System.Xml.XsdValidatingReader.Read()
at System.Xml.XmlLoader.LoadNode(Boolean skipOverWhitespace)
at System.Xml.XmlLoader.LoadDocSequence(XmlDocument parentDoc)
at System.Xml.XmlLoader.Load(XmlDocument doc, XmlReader reader, Boolean preserveWhitespace)
at System.Xml.XmlDocument.Load(XmlReader reader)
...so it really is a validation exception; now, if I validate using "MSXML.NET" (whatever that is) in oXygen, it says that the document is valid, as with the other parsers.
How can I get ValidationEvents if theire are attributes or elements in the xml that are not defined in the xsd.
xsd:
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema id="VoiceXmlTSPro"
targetNamespace="http://tempuri.org/VoiceXmlTSPro.xsd"
elementFormDefault="qualified"
xmlns="http://tempuri.org/VoiceXmlTSPro.xsd"
xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<!-- Elements-->
<xsd:element name="vxml">
<xsd:complexType>
<xsd:attribute ref="base"/>
<xsd:attribute ref="lang"/>
</xsd:complexType>
</xsd:element>
<!-- End Elements-->
<!-- Attributes-->
<xsd:attribute name="base" type="xsd:anyURI">
</xsd:attribute>
<xsd:attribute name="lang" type="xsd:string">
</xsd:attribute>
<!-- End Attributes-->
</xsd:schema>
xml:
<?xml version="1.0" encoding="utf-8" ?>
<vxml application="notsupported" lang="en-US" base="http://www.zebra.com">
<unknow></unknow>
</vxml>
I want a warning for the application attribute and for the unknow ellement.
But this code is not trowing any event.
public override void Validate(string source)
{
ValidationResults.Clear();
XmlSchemaFactory xmlSchemaFactory = new XmlSchemaFactory();
XmlReaderSettings vxmlTestSettings = new XmlReaderSettings();
vxmlTestSettings.ValidationType = ValidationType.Schema;
vxmlTestSettings.ValidationFlags = XmlSchemaValidationFlags.ProcessIdentityConstraints| XmlSchemaValidationFlags.ReportValidationWarnings ;
try
{
XmlSchema xsdSchema = xmlSchemaFactory.Create(Resource.VoiceXmlTSPro);
if (xmlSchemaFactory.HasErrors())
{
// if the schema is invalid the read wil not read
return;
}
vxmlTestSettings.Schemas.Add(xsdSchema);
ValidationEventHandler eventHandler = new ValidationEventHandler(ValidationHandler);
using (MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(source)))
using (XmlReader xmlReader = XmlReader.Create(stream, vxmlTestSettings))
{
XmlDocument document = new XmlDocument();
document.Load(xmlReader);
document.Validate(eventHandler);
}
}
catch (Exception ex)
{
...
}
}
If all you need to do is validate the string of XML markup against a schema then parsing the string with an XmlReader over a StringReader with proper XmlReaderSettings should do, there is no need to use an XmlDocument and its Validate method.
Here is an example, for the first sample it only outputs warnings as the input document has no namespace declaration, for the second document there are errors about undeclared element and attributes:
static void Main(string[] args)
{
string[] xmls = { #"<vxml application=""notsupported"" lang=""en-US"" base=""http://www.zebra.com"">
<unknow></unknow>
</vxml>",
#"<vxml application=""notsupported"" lang=""en-US"" base=""http://www.zebra.com"" xmlns=""http://example.com/ns1"">
<unknow></unknow>
</vxml>"
};
foreach (string xml in xmls)
{
Console.WriteLine("Validating");
Validate(xml, "../../schema1.xml");
Console.WriteLine();
}
}
static void Validate(string xmlMarkup, string schemaUri)
{
XmlReaderSettings xrs = new XmlReaderSettings();
xrs.Schemas.Add(null, schemaUri);
xrs.ValidationType = ValidationType.Schema;
xrs.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
xrs.ValidationEventHandler += (obj, valArgs) =>
{
Console.WriteLine("{0}: {1}", valArgs.Severity, valArgs.Message);
};
using (StringReader sr = new StringReader(xmlMarkup))
{
using (XmlReader xr = XmlReader.Create(sr, xrs))
{
while (xr.Read()) { }
}
}
}
The schema is
<?xml version="1.0" encoding="utf-8" ?>
<xsd:schema id="VoiceXmlTSPro"
targetNamespace="http://example.com/ns1"
elementFormDefault="qualified"
xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<!-- Elements-->
<xsd:element name="vxml">
<xsd:complexType>
<xsd:attribute name="base" type="xsd:anyURI"/>
<xsd:attribute name="lang" type="xsd:string"/>
</xsd:complexType>
</xsd:element>
<!-- End Elements-->
</xsd:schema>
the output is
Validating
Warning: Could not find schema information for the element 'vxml'.
Warning: Could not find schema information for the attribute 'application'.
Warning: Could not find schema information for the attribute 'lang'.
Warning: Could not find schema information for the attribute 'base'.
Warning: Could not find schema information for the element 'unknow'.
Validating
Error: The 'application' attribute is not declared.
Error: The element cannot contain white space. Content model is empty.
Error: The element 'http://example.com/ns1:vxml' cannot contain child element 'http://example.com/ns1:unknow' because the parent element's content model is empty.
I getting xml through web service in string format and to pass through xmlreader, I am converting the string to a stream object. But cannot figure out what I am missing. The xml and schema mentioned here is a sample.
class Program
{
static void Main(string[] args)
{
try
{
XmlDocument doc = new XmlDocument();
doc.Load("books.xml");
StringWriter sw = new StringWriter();
XmlTextWriter tx = new XmlTextWriter(sw);
doc.WriteTo(tx);
string leadxml = sw.ToString();
XmlReaderSettings xmlSettings = new XmlReaderSettings();
xmlSettings.Schemas = new System.Xml.Schema.XmlSchemaSet();
xmlSettings.Schemas.Add(string.Empty,"books.xsd");
xmlSettings.ValidationType = ValidationType.Schema;
byte[] byteArray = Encoding.ASCII.GetBytes(leadxml);
MemoryStream stream = new MemoryStream(byteArray);
XmlReader reader = XmlReader.Create(stream, xmlSettings);
// Parse the file.
while (reader.Read());
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
xml is:
<?xml version="1.0" encoding="utf-8" ?>
<bookstore>
<book genre="autobiography" publicationdate="1981-03-22" ISBN="1-861003-11-0"/>
</bookstore>
xsd is:
<?xml version="1.0" encoding="utf-8"?>
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="bookstore">
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs="unbounded" name="book">
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
First of all, if all your doing is getting the Xml into a string, this entire block of code is unnecessary:
XmlDocument doc = new XmlDocument();
doc.Load("books.xml");
StringWriter sw = new StringWriter();
XmlTextWriter tx = new XmlTextWriter(sw);
doc.WriteTo(tx);
string leadxml = sw.ToString();
Instead just do:
string leadxml = File.ReadAllText("books.xml");
Secondly, if you have a string already, it is completely unnecessary to do all this conversion on the string. The XmlReader.Create method has a signature which accepts a TextReader. StringReader derives from TextReader. Therefore, this:
byte[] byteArray = Encoding.ASCII.GetBytes(leadxml);
MemoryStream stream = new MemoryStream(byteArray);
XmlReader reader = XmlReader.Create(stream, xmlSettings);
Can be replaced with this:
StringReader leadxmlStringReader = new StringReader(leadXml);
XmlReader reader = XmlReader.Create(leadxmlStringReader, xmlSettings);
For any further assistance with this question, you will need to provide Exception details and/or specific error messages.
StringWriter assumes a UTF-16 encoding by default, which means you'll get an XML declaration with UTF-16 in it. You're then using Encoding.ASCII to get a binary representation though, which is incompatible with the UTF-16 declaration.
To be honest, it's not really clear why you're doing this in the first place - there's almost certainly a better way of achieving your goal, but we can't really tell what your goal is, so we can't advise what that better way might be.
EDIT: For example, this might be what you're really after:
XmlReaderSettings xmlSettings = new XmlReaderSettings
{
Schemas = { { "", "books.xsd" } },
ValidationType = ValidationType.Schema
}
using (var reader = XmlReader.Create("books.xml", xmlSettings))
{
while (reader.Read()) {}
}
I am having trouble validating serialized data.
Ok, so I started with an XSD file which I got from some third party. Generated C# classes using xsd tool. Then I added
[XmlAttribute("noNamespaceSchemaLocation", Namespace = System.Xml.Schema.XmlSchema.InstanceNamespace)]
public string SchemaLocation = "http://localhost/schemas/AP_Transactions_10052011.xsd";
to the top level object. The URL in question is obviously accessible from my machine where I am running the code. Then I am serializing it using XmlSerializer, which correctly produces
<?xml version="1.0" encoding="utf-8"?>
<BU_AP_Vendor_Invoices xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xsi:noNamespaceSchemaLocation="http://local.com/schemas/AP_Transactions_10052011.xsd">
...
</BU_AP_Vendor_Invoices>
So far so good.
Now I am trying to validate the file like so:
public static void Validate(TextReader xmlData)
{
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.Schema;
settings.ValidationFlags = XmlSchemaValidationFlags.ProcessIdentityConstraints | XmlSchemaValidationFlags.ReportValidationWarnings;
settings.ValidationEventHandler += delegate(object sender, ValidationEventArgs args)
{
Console.WriteLine(args.Message);
};
using (XmlReader xmlReader = XmlReader.Create(xmlData, settings))
while (xmlReader.Read()) ;
}
Which results Could not find schema information for the element 'element name' warnings for every element in the XML file. I assume that means the XSD is simply not being loaded.
I was looking at the XmlReaderSettings.Schemas, but how would the reader know what to add there? I assumed that if I don't add schemas explicitly then magic will simply happen, but that doesn't seem to work.
Question is how to do this properly?
Please take a look at this post; the gist is to use XmlSchemaValidationFlags.ProcessSchemaLocation.
I have some WCF methods that are used to transmit information from a server application to a website frontend for use in binding. I'm sending the result as an XElement that is a root of an XML tree containing the data I want to bind against.
I'd like to create some tests that examine the data and ensure it comes across as expected.
My current thinking is this: Every method that returns an XElement tree has a corresponding schema (.XSD) file. This file is included within the assembly that contains my WCF classes as an embedded resource.
Tests call the method on these methods and compares the result against these embedded schemas.
Is this a good idea? If not, what other ways can I use to provide a "guarantee" of what kind of XML a method will return?
If it is, how do you validate an XElement against a schema? And how can I get that schema from the assembly it's embedded in?
Id say validating xml with a xsd schema is a good idea.
How to validate a XElement with the loaded schema:
As you see in this example you need to validate the XDocument first to get populate the "post-schema-validation infoset" (There might be a solution to do this without using the Validate method on the XDOcument but Im yet to find one):
String xsd =
#"<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'>
<xsd:element name='root'>
<xsd:complexType>
<xsd:sequence>
<xsd:element name='child1' minOccurs='1' maxOccurs='1'>
<xsd:complexType>
<xsd:sequence>
<xsd:element name='grandchild1' minOccurs='1' maxOccurs='1'/>
<xsd:element name='grandchild2' minOccurs='1' maxOccurs='2'/>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:schema>";
String xml = #"<?xml version='1.0'?>
<root>
<child1>
<grandchild1>alpha</grandchild1>
<grandchild2>beta</grandchild2>
</child1>
</root>";
XmlSchemaSet schemas = new XmlSchemaSet();
schemas.Add("", XmlReader.Create(new StringReader(xsd)));
XDocument doc = XDocument.Load(XmlReader.Create(new StringReader(xml)));
Boolean errors = false;
doc.Validate(schemas, (sender, e) =>
{
Console.WriteLine(e.Message);
errors = true;
}, true);
errors = false;
XElement child = doc.Element("root").Element("child1");
child.Validate(child.GetSchemaInfo().SchemaElement, schemas, (sender, e) =>
{
Console.WriteLine(e.Message);
errors = true;
});
How to read the embedded schema from an assembly and add it to the XmlSchemaSet:
Assembly assembly = Assembly.GetExecutingAssembly();
// you can use reflector to get the full namespace of your embedded resource here
Stream stream = assembly.GetManifestResourceStream("AssemblyRootNamespace.Resources.XMLSchema.xsd");
XmlSchemaSet schemas = new XmlSchemaSet();
schemas.Add(null, XmlReader.Create(stream));
If you're doing some light-weight work and XSDs are overkill, consider also possibly strongly typing your XML data. For example, I have a number of classes in a project that derive from XElement. One is ExceptionXElement, another is HttpHeaderXElement, etc. In them, I inherit from XElement and add Parse and TryParse methods that take strings containing XML data to create an instance from. If TryParse() returns false, the string does not conform to the XML data I expect (the root element has the wrong name, missing children elements, etc.).
For example:
public class MyXElement : XElement
{
public MyXElement(XElement element)
: base(element)
{ }
public static bool TryParse(string xml, out MyXElement myElement)
{
XElement xmlAsXElement;
try
{
xmlAsXElement = XElement.Parse(xml);
}
catch (XmlException)
{
myElement = null;
return false;
}
// Use LINQ to check if xmlAsElement has correct nodes...
}