Merge XML files using XmlReader and XmlWriter

Merge XML files using XmlReader and XmlWriter - c#

I am trying to merge multiple XML files in into one using XmlReader and XmlWriter though my final file only contains the data from the last file.
I am using XmlReader and XmlWriter because the XML files to merge are large in size.
What am I doing wrong in the code below?
class Program
{
static void Main(string[] args)
{
string folder = #"C:\Temp\";
string output = folder + "_all.xml";
Encoding readEncoding = System.Text.Encoding.Default;
XmlWriterSettings writerSettings = new XmlWriterSettings();
writerSettings.Encoding = Encoding.UTF8;
writerSettings.ConformanceLevel = ConformanceLevel.Fragment;
XmlWriter writer = XmlWriter.Create(new StreamWriter(output, false), writerSettings);
bool firstFile = true;
foreach (FileInfo file in new DirectoryInfo(folder).GetFiles("*.xml").Where(f => f.Name != "_all.xml"))
{
XmlReader reader = XmlReader.Create(new StreamReader(file.FullName, readEncoding));
while(reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Element:
if (firstFile && reader.Name == "CYPHS:CYPHS")
{
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
writer.WriteAttributes(reader, true);
}
else if (firstFile && reader.Name == "CYP000")
writer.WriteStartElement(reader.Name);
else if (firstFile && reader.Name.StartsWith("C000"))
writer.WriteNode(reader, false);
else if (!firstFile && reader.Name != "CYPHS:CYPHS" && reader.Name != "CYP000" && !reader.Name.StartsWith("C000"))
writer.WriteNode(reader, false);
break;
default:
break;
}
}
firstFile = false;
reader.Close();
}
writer.WriteEndElement();
writer.WriteEndElement();
writer.Close();
Console.WriteLine("Done!");
Console.ReadLine();
}
}
File 1
<CYPHS:CYPHS xsi:schemaLocation="http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5 CYPHSCYPHS_XMLSchema-v1-5.xsd"
xmlns:CYPHS="http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<CYP000>
<C000010>File 1</C000010>
<CYP001>
<C001901>File 1</C001901>
<CYP101>
<C101902>File 1</C101902>
<CYP102>
<C102902>File 1</C102902>
</CYP102>
</CYP101>
<CYP002>
<C002901>File 1</C002901>
</CYP002>
</CYP001>
</CYP000>
</CYPHS:CYPHS>
File 2
<CYPHS:CYPHS xsi:schemaLocation="http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5 CYPHSCYPHS_XMLSchema-v1-5.xsd"
xmlns:CYPHS="http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<CYP000>
<C000010>File 2</C000010>
<CYP001>
<C001901>File 2</C001901>
<CYP101>
<C101902>File 2</C101902>
<CYP102>
<C102902>File 2</C102902>
</CYP102>
</CYP101>
<CYP002>
<C002901>File 2</C002901>
</CYP002>
</CYP001>
</CYP000>
</CYPHS:CYPHS>
Should be merged into file as so:
<CYPHS:CYPHS xsi:schemaLocation="http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5 CYPHSCYPHS_XMLSchema-v1-5.xsd"
xmlns:CYPHS="http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<CYP000>
<C000010>File 1</C000010>
<CYP001>
<C001901>File 1</C001901>
<CYP101>
<C101902>File 1</C101902>
<CYP102>
<C102902>File 1</C102902>
</CYP102>
</CYP101>
<CYP002>
<C002901>File 1</C002901>
</CYP002>
</CYP001>
<CYP001>
<C001901>File 2</C001901>
<CYP101>
<C101902>File 2</C101902>
<CYP102>
<C102902>File 2</C102902>
</CYP102>
</CYP101>
<CYP002>
<C002901>File 2</C002901>
</CYP002>
</CYP001>
</CYP000>
</CYPHS:CYPHS>

Like This
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication53
{
class Program
{
static void Main(string[] args)
{
string file1 =
"<CYPHS:CYPHS xsi:schemaLocation=\"http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5 CYPHSCYPHS_XMLSchema-v1-5.xsd\"" +
" xmlns:CYPHS=\"http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5\"" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">" +
"<CYP000>" +
"<C000010>File 1</C000010>" +
"<CYP001>" +
"<C001901>File 1</C001901>" +
"<CYP101>" +
"<C101902>File 1</C101902>" +
"<CYP102>" +
"<C102902>File 1</C102902>" +
"</CYP102>" +
"</CYP101>" +
"<CYP002>" +
"<C002901>File 1</C002901>" +
"</CYP002>" +
"</CYP001>" +
"</CYP000>" +
"</CYPHS:CYPHS>";
XDocument doc1 = XDocument.Parse(file1);
XElement doc1_CYP000 = doc1.Descendants("CYP000").FirstOrDefault();
string file2 =
"<CYPHS:CYPHS xsi:schemaLocation=\"http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5 CYPHSCYPHS_XMLSchema-v1-5.xsd\"" +
" xmlns:CYPHS=\"http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5\"" +
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">" +
"<CYP000>" +
"<C000010>File 2</C000010>" +
"<CYP001>" +
"<C001901>File 2</C001901>" +
"<CYP101>" +
"<C101902>File 2</C101902>" +
"<CYP102>" +
"<C102902>File 2</C102902>" +
"</CYP102>" +
"</CYP101>" +
"<CYP002>" +
"<C002901>File 2</C002901>" +
"</CYP002>" +
"</CYP001>" +
"</CYP000>" +
"</CYPHS:CYPHS>";
XDocument doc2 = XDocument.Parse(file2);
XElement doc2_CYP000 = doc2.Descendants("CYP000").FirstOrDefault();
doc1_CYP000.Add(doc2_CYP000.Descendants());
}
}
}

I'm not entirely sure where you went wrong, but it seems most straightforward to check the Depth, LocalName and NamespaceURI properties of XmlReader when combining your XML files. I strongly recommend against hardcoding the namespace prefixes since the prefix can be replaced with any other prefix without changing the semantics of the XML file.
One thing to note: XmlWriter.WriteNode(XmlReader, bool) advances the reader to the beginning of the next node, so if you subsequently call Read() and there is no whitespace in the file you'll skip over the next element. With this in mind, when working directly with XmlReader, it's better to test both with and without spacing.
Thus:
public class XmlConcatenate
{
public static void ConcatenateAllFiles()
{
string folder = "C:\\Temp\\";
string output = folder + "_all.xml";
Encoding readEncoding = System.Text.Encoding.Default; // WHY NOT Encoding.UTF8 !?
var files = new DirectoryInfo(folder).GetFiles("*.xml").Where(f => f.Name != "_all.xml").Select(f => f.FullName).Select(n => (TextReader)new StreamReader(n, readEncoding));
using (var textWriter = new StreamWriter(output, false))
{
Concatenate(files, textWriter);
}
}
public static void Concatenate(IEnumerable<TextReader> inputs, TextWriter output)
{
var writerSettings = new XmlWriterSettings() { Encoding = Encoding.UTF8, ConformanceLevel = ConformanceLevel.Fragment };
var whiteSpace = new StringBuilder();
int indent = 0;
using (var writer = XmlWriter.Create(output, writerSettings))
{
var writeDepth = 0;
var first = true;
foreach (var input in inputs)
{
using (input)
using (var reader = XmlReader.Create(input))
{
bool alreadyRead = false;
while (!reader.EOF && (alreadyRead || reader.Read()))
{
alreadyRead = false;
switch (reader.NodeType)
{
case XmlNodeType.Element:
{
if (reader.Depth == 0 && reader.LocalName == "CYPHS" && reader.NamespaceURI == "http://www.datadictionary.nhs.uk/messages/CYPHS-v1-5")
{
if (writeDepth == 0)
{
writer.WriteWhitespace(whiteSpace.ToString());
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
writer.WriteAttributes(reader, true);
writeDepth++;
}
}
else if (reader.Depth == 1 && reader.LocalName == "CYP000" && reader.NamespaceURI == "")
{
if (writeDepth == 1)
{
indent = whiteSpace.ToString().Replace("\n", "").Replace("\r", "").Length;
writer.WriteWhitespace(whiteSpace.ToString());
writer.WriteStartElement(reader.LocalName, reader.NamespaceURI);
writeDepth++;
}
}
else if (reader.Depth == 2)
{
if (reader.LocalName.StartsWith("C000") && reader.NamespaceURI == "")
{
if (first)
{
first = false;
writer.WriteWhitespace(whiteSpace.ToString());
writer.WriteNode(reader, false);
alreadyRead = true;
}
}
else
{
writer.WriteWhitespace(whiteSpace.ToString());
writer.WriteNode(reader, false);
alreadyRead = true;
}
}
whiteSpace.Length = 0; // Clear accumulated whitespace.
}
break;
case XmlNodeType.Whitespace:
{
whiteSpace.Append(reader.Value);
}
break;
default:
break;
}
}
}
}
while (writeDepth-- > 0)
{
if (indent > 0)
writer.WriteWhitespace("\n" + new string(' ', indent * writeDepth));
writer.WriteEndElement();
}
}
}
}
Bit of a nuisance getting the spacing to merge, if you don't care about preserving the spacing you can simplify the code substantially.
Working fiddle.
You might not want to use System.Text.Encoding.Default for reading your XML files. From the docs:
Because all Default encodings lose data, you might use UTF8 instead. UTF-8 is often identical in the U+00 to U+7F range, but can encode other characters without loss.

A different solution could be to use a custom XmlReader-implementation to concat the files while reading them.
Then use this custom reader along with an XmlWriter to create the merged file.
The custom XmlReader keeps internal XmlReaders for each file.
The intro/end is only read from the first file.
Only the relevant (to-be-appended) elements are read from the other files.
create an XmlReader for the first file
read up to the point where elements should be appended
for each subsequent file
create a new XmlReader
skip ahead to the first relevant element
read the relevant elements
dispose the reader
read the rest of the first file (resume the reader from step 1)
dispose the reader
Example implementation
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
public static class XmlConcatenator
{
// first: pause reading at the end of this element, will resume after subsequent streams are read
// subsequent: stop reading at the end of this element
private const string StopAtEndOf = "CYP000";
// first: (ignores this)
// subsequent: skip ahead to the first instance of this element
private const string ResumeAtFirst = "CYP001";
private static readonly XmlReaderSettings XmlReaderSettings = new XmlReaderSettings() { DtdProcessing = DtdProcessing.Ignore };
private static readonly XmlWriterSettings XmlWriterSettings = new XmlWriterSettings() { Encoding = Encoding.UTF8, Indent = true };
public static void Concat(Stream outStream, Stream[] fileStreams)
{
using var reader = XmlConcatReader.Create(fileStreams);
using var writer = XmlWriter.Create(outStream, XmlWriterSettings);
writer.WriteNode(reader, true);
}
private class XmlConcatReader : XmlReader
{
private readonly XmlReader _firstReader;
private readonly IEnumerator<Stream> _streams;
private XmlReader _currentReader;
private XmlConcatReader(Stream first, IEnumerable<Stream> streams)
{
_firstReader = XmlReader.Create(first, XmlReaderSettings);
_streams = streams.GetEnumerator();
_currentReader = _firstReader;
}
public static XmlReader Create(Stream[] inputStreams)
{
if (!(inputStreams?.Length > 1))
{
throw new InvalidOperationException($"{nameof(inputStreams)} must contain at least two streams");
}
return new XmlConcatReader(inputStreams[0], inputStreams.Skip(1));
}
public override bool Read()
{
var b = _currentReader.Read();
if (_currentReader.NodeType == XmlNodeType.EndElement && _currentReader.LocalName == StopAtEndOf)
{
// note: _firstReader is disposed at the end. See: Dispose(bool)
if (!ReferenceEquals(_currentReader, _firstReader))
{
_currentReader.Dispose();
}
if (_streams.MoveNext())
{
_currentReader = XmlReader.Create(_streams.Current, XmlReaderSettings);
while (_currentReader.Read())
{
if (_currentReader.LocalName == ResumeAtFirst)
{
return true;
}
}
}
else
{
_currentReader = _firstReader;
return true;
}
}
return b;
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
_firstReader?.Dispose();
}
base.Dispose(disposing);
}
public override XmlNodeType NodeType => _currentReader.NodeType;
public override string LocalName => _currentReader.LocalName;
public override string NamespaceURI => _currentReader.NamespaceURI;
public override string Prefix => _currentReader.Prefix;
public override string Value => _currentReader.Value;
public override int Depth => _currentReader.Depth;
public override string BaseURI => _currentReader.BaseURI;
public override bool IsEmptyElement => _currentReader.IsEmptyElement;
public override int AttributeCount => _currentReader.AttributeCount;
public override bool EOF => _currentReader.EOF;
public override ReadState ReadState => _currentReader.ReadState;
public override XmlNameTable NameTable => _currentReader.NameTable;
public override string GetAttribute(string name) => _currentReader.GetAttribute(name);
public override string GetAttribute(string name, string namespaceURI) => _currentReader.GetAttribute(name, namespaceURI);
public override string GetAttribute(int i) => _currentReader.GetAttribute(i);
public override string LookupNamespace(string prefix) => _currentReader.LookupNamespace(prefix);
public override bool MoveToAttribute(string name) => _currentReader.MoveToAttribute(name);
public override bool MoveToAttribute(string name, string ns) => _currentReader.MoveToAttribute(name, ns);
public override bool MoveToElement() => _currentReader.MoveToElement();
public override bool MoveToFirstAttribute() => _currentReader.MoveToFirstAttribute();
public override bool MoveToNextAttribute() => _currentReader.MoveToNextAttribute();
public override bool ReadAttributeValue() => _currentReader.ReadAttributeValue();
public override void ResolveEntity() => _currentReader.ResolveEntity();
}
}
Example of use
using System.IO;
using System.Linq;
internal static class Program
{
private static void Main()
{
var input = new[] { "in1.xml", "in2.xml" };
var output = "output.xml";
var inputStreams = input.Select(p => File.Open(p, FileMode.Open)).ToArray();
using var outputStream = File.Create(output);
XmlConcatenator.Concat(outputStream, inputStreams);
foreach (var stream in inputStreams)
{
stream.Dispose();
}
}
}

Related

C# remove not UTF-8 supported values from XML for XslCompiledTransform.Transform

Every time I want to run XslCompiledTransform.Transform, I get an exception due to invalid characters.
One of such characters is e.g. "xFFFE".
How can I remove all invalid characters in C#?
XmlConvert.IsXmlChar doesn't work because here I check every single char and "xFFFE" as single char is not an invalid char.
I run into an exception always in XslCompiledTransform.Transfor but only if "xFFFE" is in the XML doc.
Here is the code:
string document = "<?xml version=\"1.0\" encoding=\"utf-8\"?><FirstTag><Second><Third>;</Third></Second></FirstTag>";
public static string Clean(string document)
{
XmlWriterSettings writerSettings = new XmlWriterSettings();
XsltArgumentList argsList;
document = RemoveXmlNotSupportedSigns(document);
string result = "<?xml version=\"1.0\" encoding=\"utf-8\"?>";
try
{
using (StringReader sr = new StringReader(document))
{
using (StringWriter sw = new StringWriter())
{
using (XmlReader xmlR = XmlReader.Create(sr))
{
using (XmlWriter xmlW = XmlWriter.Create(sw, writerSettings))
{
Uri uri = new Uri(string.Format(CultureInfo.InvariantCulture, "{0}clean.xsl", Uri), UriKind.Relative);
argsList = new XsltArgumentList();
using (Stream xslSheet = Application.GetResourceStream(uri).Stream)
{
//Init resolver with the url of the recource path without filename
ResourceResolver resolver = new ResourceResolver(Uri);
using (XmlReader xmlReader = XmlReader.Create(xslSheet))
{
XsltSettings settings = new XsltSettings();
settings.EnableDocumentFunction = true;
// Transform
XslCompiledTransform.Load(xmlReader, settings, resolver);
XslCompiledTransform.Transform(xmlR, argsList, xmlW, resolver);
}
}
}
}
result = result + sw.ToString();
}
}
return result;
}
catch (Exception Ex)
{
return result;
}
}

If you look at https://www.w3.org/TR/xml/#charsets you will find the allowed characters with the range [#xE000-#xFFFD] clearly not including #xFFFE. So this character is not part of well-formed XML 1.0 document, in your code sample it is not XslCompiledTransform or XSLT rejecting it, it is simply the underlying parser, XmlReader.
If you want to process such mal-formed input with XmlReader you can use the XmlReaderSettings with CheckCharacters = false and eliminate such characters, I think, by checking each with e.g. XmlConvert.IsXmlChar.
With the help of XmlWrappingReader from the MvpXml library (https://github.com/keimpema/Mvp.Xml.NetStandard) you could implement a filtering XmlReader:
public class MyWrappingReader : XmlWrappingReader
{
public MyWrappingReader(XmlReader baseReader) : base(baseReader) { }
public override string Value => base.NodeType == XmlNodeType.Text || base.NodeType == XmlNodeType.CDATA || base.NodeType == XmlNodeType.Attribute ? CleanString(base.Value) : base.Value;
public override string ReadString()
{
if (base.NodeType == XmlNodeType.Text || base.NodeType == XmlNodeType.CDATA || base.NodeType == XmlNodeType.Attribute)
{
return CleanString(base.ReadString());
}
else
{
return base.ReadString();
}
}
public override string GetAttribute(int i)
{
return CleanString(base.GetAttribute(i));
}
public override string GetAttribute(string localName, string namespaceUri)
{
return CleanString(base.GetAttribute(localName, namespaceUri));
}
public override string GetAttribute(string name)
{
return CleanString(base.GetAttribute(name));
}
private string CleanString(string input)
{
return string.Join("", input.ToCharArray().Where(c => XmlConvert.IsXmlChar(c)));
}
}
Then use that reader to filter your input and XslCompiledTransform should work on the cleaned XML e.g. the following runs fine:
string document = "<?xml version=\"1.0\" encoding=\"utf-8\"?><FirstTag><Second att1='value'><Third>a</Third></Second></FirstTag>";
string xsltIndentity = #"<xsl:stylesheet xmlns:xsl='http://www.w3.org/1999/XSL/Transform' version='1.0'><xsl:template match='#* | node()'><xsl:copy><xsl:apply-templates select='#* | node()'/></xsl:copy></xsl:template></xsl:stylesheet>";
using (StringReader sr = new StringReader(document))
{
using (XmlReader xr = new MyWrappingReader(XmlReader.Create(sr, new XmlReaderSettings() { CheckCharacters = false })))
{
using (StringReader xsltSrReader = new StringReader(xsltIndentity))
{
using (XmlReader xsltReader = XmlReader.Create(xsltSrReader))
{
XslCompiledTransform processor = new XslCompiledTransform();
processor.Load(xsltReader);
processor.Transform(xr, null, Console.Out);
Console.WriteLine();
}
}
}
}

How to read last line from output process class?

I'm using Process class to start some process, it's calculating some data giving output at it's console and I need to read last line of process console. How it should be done? It's something with process.BeginOutputReadLine(); but I do not know how to use for read only LAST line.

string lastLine = null;
while (!process.StandardOutput.EndOfStream)
{
lastLine = process.StandardOutput.ReadLine();
}
//do what you want here with lastLine;

Here is the code that should do what you need:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication
{
class Program
{
public static Process ShellStart(string aCmd, TextWriter aOutputWriter = null, TextWriter aErrorWriter = null)
{
var vProcess = new Process();
var vStartInfo = vProcess.StartInfo;
vStartInfo.FileName = Path.Combine(Environment.SystemDirectory, "CMD.EXE") ;
var vCmd = "/Q /C ";
vStartInfo.Arguments = vCmd + "\"" + aCmd + "\"";
vStartInfo.UseShellExecute = false;
vStartInfo.CreateNoWindow = true;
if (aOutputWriter != null)
{
vProcess.OutputDataReceived += (p, a) =>
{
if (a.Data != null)
{
aOutputWriter.WriteLine(a.Data);
}
};
vStartInfo.RedirectStandardOutput = true;
vStartInfo.RedirectStandardInput = true;
}
if (aErrorWriter != null)
{
vProcess.ErrorDataReceived += (p, a) =>
{
if (a.Data != null)
{
aErrorWriter.WriteLine(a.Data);
}
};
vStartInfo.RedirectStandardError = true;
vStartInfo.RedirectStandardInput = true;
}
if (!vProcess.Start()) return null;
if (aOutputWriter != null || aErrorWriter != null)
vProcess.Exited += (s, e) =>
{
if (aOutputWriter != null) aOutputWriter.Flush();
if (aErrorWriter != null) aErrorWriter.Flush();
};
if (aOutputWriter != null) vProcess.BeginOutputReadLine();
if (aErrorWriter != null) vProcess.BeginErrorReadLine();
if (vStartInfo.RedirectStandardInput) vProcess.StandardInput.Close();
return vProcess;
}
public static int ShellExec(string aCmd, TextWriter aOutputWriter = null, TextWriter aErrorWriter = null)
{
var vResult = -1;
using (var vProcess = ShellStart(aCmd, aOutputWriter, aErrorWriter))
if (vProcess != null)
{
vProcess.WaitForExit();
vResult = vProcess.ExitCode;
vProcess.Close();
}
return vResult;
}
public static IEnumerable<String> SplitLines(string s)
{
string vLine;
if (!String.IsNullOrEmpty(s))
using (var vReader = new StringReader(s))
while ((vLine = vReader.ReadLine()) != null)
{
yield return vLine;
}
}
public static string ShellExecGetLastLine(string aCmd)
{
var vOutput = new StringBuilder();
using (TextWriter vWriter = new StringWriter(vOutput))
{
ShellExec(aCmd, vWriter, null);
return SplitLines(Convert.ToString(vOutput).Trim()).LastOrDefault();
}
}
static void Main(string[] args)
{
Console.WriteLine(ShellExecGetLastLine("attrib"));
}
}
}
You can use ShellExecGetLastLine(command) where command is the path to your executable (along with the arguments, if required) to get the last line of the output.
In the example above, it calls attrib that outputs attributes of all files in the current directory, and returns the last line of output

proper way to get schemaLocation to determine which xsd to load and which type to return?

I'm learning how to do "proper" unknown (but known) xml validation that use versioning
I got it working BUT i don't like how I'm doing it, my question is; I'm on the correct path here?
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Web;
using System.Xml;
using System.Xml.Schema;
namespace BLL.XML
{
public class ValidateXML
{
//list of valid xsd files and the corresponding class
private static List<KeyValuePair<string, string>> SchemaType;
static ValidateXML()
{
SchemaType = new List<KeyValuePair<string, string>>();
SchemaType.Add(new KeyValuePair<string, string>("sample.0.6.2.xsd", "BO.contentExchangeType, BO, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null"));
SchemaType.Add(new KeyValuePair<string, string>("sample.0.6.1.xsd", "BO.contentExchangeType11231321312, BO, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null"));
}
//valid xml
private bool valid;
//list of errors found while validating
private List<string> errors;
//list of warnings found while validating
private List<string> warnings;
private ValidateXML()
{
errors = new List<string>();
warnings = new List<string>();
valid = true; //by default, the xml is valid
}
public static KeyValuePair<bool, List<string>> Validate(string xml, out Type XmlType)
{
var validate = new ValidateXML();
string xsd = "";
try
{
using (TextReader sr = new StringReader(xml))
using (XmlReader xmlReader = XmlReader.Create(sr))
{
if (xmlReader.MoveToContent() == XmlNodeType.Element)
{
//should look like this
//xsi:schemaLocation="http://sample.com/sample/contentExchange sample.0.6.2.xsd "
var schemaLocation = xmlReader.GetAttribute("xsi:schemaLocation");
if (!string.IsNullOrWhiteSpace(schemaLocation))
{
var part = schemaLocation.Split();
if (part.GetUpperBound(0) >= 1)
{
//get the actual xsd file name for validation
xsd = part[1];
}
}
}
}
//xsd found in the predefined list?
if (SchemaType.Any(x => x.Key == xsd))
{
var xsdFile = HttpContext.Current.Server.MapPath("~/bin/xsd/" + xsd);
if(File.Exists(xsdFile))
{
//prepare the return type
XmlType = Type.GetType(SchemaType.First(x => x.Key == xsd).Value);
//validate the xml based on the proper xsd
return validate.VerifyXmlFile(xml, xsdFile);
}
else
{
throw new Exception("Cannot find the file " + xsd );
}
}
else
{
throw new Exception("Cannot find matching xsd value");
}
}
catch (Exception ex)
{
validate.errors.Add("Exception.Message: " + ex.Message);
XmlType = null;
return new KeyValuePair<bool, List<string>>(false, validate.errors);
}
}
private KeyValuePair<bool, List<string>> VerifyXmlFile(string xml, string PathXsd)
{
try
{
using (TextReader sr = new StringReader(xml))
using (XmlReader xsd = XmlReader.Create(PathXsd))
{
// configure the xmlreader validation to use inline schema.
XmlReaderSettings config = new XmlReaderSettings();
config.ValidationType = ValidationType.Schema;
config.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
config.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
config.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
config.ValidationFlags |= XmlSchemaValidationFlags.ProcessIdentityConstraints;
config.ValidationEventHandler += new ValidationEventHandler(ValidationCallBack);
config.Schemas.Add(null, xsd);
// Get the XmlReader object with the configured settings.
using (XmlReader reader = XmlReader.Create(sr, config))
{
// Parsing the file will cause the validation to occur.
while (reader.Read()) ;
return new KeyValuePair<bool, List<string>>(valid, errors);
}
}
}
catch(Exception ex)
{
errors.Add("Exception.Message: " + ex.Message);
return new KeyValuePair<bool, List<string>>(false, errors);
}
}
private void ValidationCallBack(object sender, ValidationEventArgs vea)
{
if (vea.Severity == XmlSeverityType.Error)
{
valid = false;
errors.Add("ValidationCallBack: " + vea.Message);
}
else if (vea.Severity == XmlSeverityType.Warning)
{
warnings.Add("ValidationCallBack: " + vea.Message);
}
}
}
}
to be used like
var ValidXml = BLL.XML.ValidateXML.Validate(text, out XmlType);
if (ValidXml.Key)
{
if (XmlType == typeof(contentExchangeType))
{
ProcessContentExchangeType062(text);
}
else if (XmlType == typeof(contentExchangeType11231321312))
{
ProcessContentExchangeType061(text);
}
}

Overwriting xml file #2

I'm trying to edit an xml and then save it with the same name.
I have the following code:
public int ModifyFile(string xmlpath, string option, int returnCode)
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(xmlpath);
XmlNode parentNode = xmlDoc.DocumentElement;
if (option.Equals("delete"))
{
returnCode = DeleteTag(parentNode, "identity", returnCode);
}
xmlDoc.Save(xmlpath);
return returnCode;
}
public int DeleteTag(XmlNode root, string deleteName, int returnCode)
{
foreach (XmlNode node in root.ChildNodes)
{
if (node.Name == deleteName)
{
root.RemoveChild(node);
returnCode = 1;
}
else
{
returnCode = DeleteTag(node, deleteName, returnCode);
}
}
return returnCode;
}
I'm getting "The process cannot access the file 'c:\temp\testfile.xml' because it is being used by another process" when it executes xmlDoc.Save(path).
How would I be able to save testfile.xml with the changes made? I need to keep the path and name the same.
public static bool hasIdentityTag(string path)
{
bool isTextPresent = false;
if (File.Exists(path))
{
XmlTextReader rdrXml = new XmlTextReader(path);
do
{
switch (rdrXml.NodeType)
{
case XmlNodeType.Element:
if (rdrXml.Name.Equals("identity"))
{
isTextPresent = true;
rdrXml.Close();
}
break;
}
} while (rdrXml.Read());
}
else
{
Console.WriteLine("The file {0} could not be located", path);
}
return isTextPresent;
}

One option would be to save the new XML to a temporary file, close the XmlDocument and dispose of the object, then move the temporary file back to the right place.

You could try this re-write using LinqToXml:
XElement root = XElement.Load(xmlpath);
bool modified = false;
try
{
switch(option)
{
case "delete":
var toDelete = root.Descendants("identity").ToArray();
foreach(XElement x in toDelete)
{
x.Remove();
modified = true;
returnCode = 1;
}
break;
}
}
finally
{
if(modified)
root.Save(xmlpath);
}
return returnCode;

How about loading the XmlDocument from a Stream instead of by file name? If you still encounter errors, this would indicate that something outside of your method/process is blocking the save.
Try rewriting theModifyFile method like this:
public int ModifyFile(string xmlpath, string option, int returnCode)
{
var fs = File.Open(xmlpath);
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(fs); // use the stream, not file name
fs.Close(); // now close the stream... file should not be locked from this point
XmlNode parentNode = xmlDoc.DocumentElement;
if (option.Equals("delete"))
{
returnCode = DeleteTag(parentNode, "identity", returnCode);
}
xmlDoc.Save(path);
return returnCode;
}

Save email message as eml using C# in Lotus Notes

I need to export (save to) hard drive my Lotus Notes emails.
I figured out the way how to save attachments to HDD, but I can't figure out the way of how to save the whole email.
The code below shows how I export attachments. Can you suggest how can I modify it to save emails?
PS- I am new to programming.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Domino;
using System.Collections;
namespace ExportLotusAttachments
{
class Class1
{
public void ScanForEmails()
{
String textBox1 = "c:\\1";
NotesSession session = new NotesSession();
session.Initialize("");
NotesDbDirectory dir = null;
dir = session.GetDbDirectory("");
NotesDatabase db = null;
db = dir.OpenMailDatabase();
NotesDatabase NDb = dir.OpenMailDatabase(); //Database connection
//ArrayList that will hold names of the folders
ArrayList LotusViews2 = new ArrayList();
foreach (NotesView V in NDb.Views)
{
if (V.IsFolder && !(V.Name.Equals("($All)")))
{
NotesView getS = V;
LotusViews2.Add(getS.Name);
}
}
foreach (String obj in LotusViews2)
{
NotesDocument NDoc;
NotesView nInboxDocs = NDb.GetView(obj);
NDoc = nInboxDocs.GetFirstDocument();
String pAttachment;
while (NDoc != null)
{
if (NDoc.HasEmbedded && NDoc.HasItem("$File"))
{
object[] AllDocItems = (object[])NDoc.Items;
foreach (object CurItem in AllDocItems)
{
NotesItem nItem = (NotesItem)CurItem;
if (IT_TYPE.ATTACHMENT == nItem.type)
{
String path = textBox1;
pAttachment = ((object[])nItem.Values)[0].ToString();
if (!System.IO.Directory.Exists(path))
{
System.IO.Directory.CreateDirectory(textBox1);
}
try
{
NDoc.GetAttachment(pAttachment).ExtractFile(#path + pAttachment);
}
catch { }
}
}
}
NDoc = nInboxDocs.GetNextDocument(NDoc);
}
}
}
}
}

This post by Bob Babalan explains how to export lotus documents using Java. The same principle should work in C# or VB. The document is cnverted into MIME and written to the disk.
Or in version 8.5.3 (I think it started witn 8.5.1) you can just drag and drop it from the mail file to the file system.

I know it is a bit late, but this is, what I did. (Based on Bob Babalan)
Bobs Solution helped me alot to understand NotesMIMEEntities, but in his solution, he only traversed the MIME-Tree to the second "layer". This will traverse multiple layers.
public static void GetMIME(StreamWriter writer, NotesMIMEEntity mimeEntity)
{
try
{
string contentType = null;
string headers = null;
string content = null;
string preamble = null;
MIME_ENCODING encoding;
contentType = mimeEntity.ContentType;
headers = mimeEntity.Headers;
encoding = mimeEntity.Encoding;
// message envelope. If no MIME-Version header, add one
if (!headers.Contains("MIME-Version:"))
writer.WriteLine("MIME-Version: 1.0");
writer.WriteLine(headers);
// for multipart, usually no main-msg content...
content = mimeEntity.ContentAsText;
if (content != null && content.Trim().Length > 0)
writer.WriteLine(content);
writer.Flush();
if (contentType.StartsWith("multipart"))
{
preamble = mimeEntity.Preamble;
NotesMIMEEntity mimeChild = mimeEntity.GetFirstChildEntity();
while (mimeChild != null)
{
GetMimeChild(writer, mimeChild);
mimeChild = mimeChild.GetNextSibling();
}
}
writer.WriteLine(mimeEntity.BoundaryEnd);
writer.Flush();
}
catch (Exception ex)
{
Logging.Log(ex.ToString());
}
}
private void GetMimeChild(StreamWriter writer, NotesMIMEEntity mimeEntity)
{
string contentType = null;
string headers = null;
string content = null;
string preamble = null;
MIME_ENCODING encoding;
contentType = mimeEntity.ContentType;
headers = mimeEntity.Headers;
encoding = mimeEntity.Encoding;
if (encoding == MIME_ENCODING.ENC_IDENTITY_BINARY)
{
mimeEntity.EncodeContent(MIME_ENCODING.ENC_BASE64);
headers = mimeEntity.Headers;
}
preamble = mimeEntity.Preamble;
writer.Write(mimeEntity.BoundaryStart);
if (!content.EndsWith("\n"))
writer.WriteLine("");
writer.WriteLine(headers);
writer.WriteLine();
writer.Write(mimeEntity.ContentAsText);
if (contentType.StartsWith("multipart"))
{
preamble = mimeEntity.Preamble;
NotesMIMEEntity mimeChild = mimeEntity.GetFirstChildEntity();
while (mimeChild != null)
{
GetMimeChild(writer, mimeChild);
mimeChild = mimeChild.GetNextSibling();
}
}
writer.Write(mimeEntity.BoundaryEnd);
writer.Flush();
}
I would call this methods like this, to save the EML-File to a given path.
using (FileStream fs = new FileStream (path,FileMode.Create,FileAccess.ReadWrite,FileShare.None))
{
using (StreamWriter writer = new StreamWriter(fs))
{
NotesMimeEntity mimeEntity = notesDocument.GetMIMEEntity();
if (mimeEntity != null)
GetMIME(writer, mimeEntity);
}
}

We Keep Coding

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

Merge XML files using XmlReader and XmlWriter - c#

Related

C# remove not UTF-8 supported values from XML for XslCompiledTransform.Transform

How to read last line from output process class?

proper way to get schemaLocation to determine which xsd to load and which type to return?

Overwriting xml file #2

Save email message as eml using C# in Lotus Notes

Categories

Resources