What is the fastest way to load XML into an XDocument?

What is the fastest way to load XML into an XDocument? - c#

When you create a new XDocument using XDocument.Load, does it open the XML file and keep a local copy, or does it continuously read the document from the hard drive? If it does continuously read, is there a faster way to parse XML?
XDocument x = XDocument.Load("file.xml");

There are a couple of measurements to consider:
Linear traversal speed (e.g. reading/loading)
On-demand query speed
To answer the immediate question: XDocument uses an XmlReader to load the document into memory by reading each element and creating corresponding XElement instances (see code below). As such, it should be quite fast (fast enough for most purposes), but it may consume a large amount of memory when parsing a large document.
A raw XmlReader is an excellent choice for traversal if your needs are limited to that which can be done without retaining the document in memory. It will outperform other methods since no significant structure is created nor resolved with relation to other nodes (e.g. linking parent and child nodes). However, on-demand query ability is almost non-existent; you can react to values found in each node, but you can't query the document as a whole. If you need to look at the document a second time, you have to traverse the whole thing again.
By comparison, an XDocument will take longer to traverse because it instantiates new objects and performs basic structural tasks. It will also consume memory proportionate to the size of the source. In exchange for these trade-offs, you gain excellent query abilities.
It may be possible to combine the approaches, as mentioned by Jon Skeet and shown here: Streaming Into LINQ to XML Using C# Custom Iterators and XmlReader.
Source for XDocument Load()
public static XDocument Load(Stream stream, LoadOptions options)
{
XmlReaderSettings xmlReaderSettings = XNode.GetXmlReaderSettings(options);
XDocument result;
using (XmlReader xmlReader = XmlReader.Create(stream, xmlReaderSettings))
{
result = XDocument.Load(xmlReader, options);
}
return result;
}
// which calls...
public static XDocument Load(XmlReader reader, LoadOptions options)
{
if (reader == null)
{
throw new ArgumentNullException("reader");
}
if (reader.ReadState == ReadState.Initial)
{
reader.Read();
}
XDocument xDocument = new XDocument();
if ((options & LoadOptions.SetBaseUri) != LoadOptions.None)
{
string baseURI = reader.BaseURI;
if (baseURI != null && baseURI.Length != 0)
{
xDocument.SetBaseUri(baseURI);
}
}
if ((options & LoadOptions.SetLineInfo) != LoadOptions.None)
{
IXmlLineInfo xmlLineInfo = reader as IXmlLineInfo;
if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
{
xDocument.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
}
}
if (reader.NodeType == XmlNodeType.XmlDeclaration)
{
xDocument.Declaration = new XDeclaration(reader);
}
xDocument.ReadContentFrom(reader, options);
if (!reader.EOF)
{
throw new InvalidOperationException(Res.GetString("InvalidOperation_ExpectedEndOfFile"));
}
if (xDocument.Root == null)
{
throw new InvalidOperationException(Res.GetString("InvalidOperation_MissingRoot"));
}
return xDocument;
}
// which calls...
internal void ReadContentFrom(XmlReader r, LoadOptions o)
{
if ((o & (LoadOptions.SetBaseUri | LoadOptions.SetLineInfo)) == LoadOptions.None)
{
this.ReadContentFrom(r);
return;
}
if (r.ReadState != ReadState.Interactive)
{
throw new InvalidOperationException(Res.GetString("InvalidOperation_ExpectedInteractive"));
}
XContainer xContainer = this;
XNode xNode = null;
NamespaceCache namespaceCache = default(NamespaceCache);
NamespaceCache namespaceCache2 = default(NamespaceCache);
string text = ((o & LoadOptions.SetBaseUri) != LoadOptions.None) ? r.BaseURI : null;
IXmlLineInfo xmlLineInfo = ((o & LoadOptions.SetLineInfo) != LoadOptions.None) ? (r as IXmlLineInfo) : null;
while (true)
{
string baseURI = r.BaseURI;
switch (r.NodeType)
{
case XmlNodeType.Element:
{
XElement xElement = new XElement(namespaceCache.Get(r.NamespaceURI).GetName(r.LocalName));
if (text != null && text != baseURI)
{
xElement.SetBaseUri(baseURI);
}
if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
{
xElement.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
}
if (r.MoveToFirstAttribute())
{
do
{
XAttribute xAttribute = new XAttribute(namespaceCache2.Get((r.Prefix.Length == 0) ? string.Empty : r.NamespaceURI).GetName(r.LocalName), r.Value);
if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
{
xAttribute.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
}
xElement.AppendAttributeSkipNotify(xAttribute);
}
while (r.MoveToNextAttribute());
r.MoveToElement();
}
xContainer.AddNodeSkipNotify(xElement);
if (r.IsEmptyElement)
{
goto IL_30A;
}
xContainer = xElement;
if (text != null)
{
text = baseURI;
goto IL_30A;
}
goto IL_30A;
}
case XmlNodeType.Text:
case XmlNodeType.Whitespace:
case XmlNodeType.SignificantWhitespace:
if ((text != null && text != baseURI) || (xmlLineInfo != null && xmlLineInfo.HasLineInfo()))
{
xNode = new XText(r.Value);
goto IL_30A;
}
xContainer.AddStringSkipNotify(r.Value);
goto IL_30A;
case XmlNodeType.CDATA:
xNode = new XCData(r.Value);
goto IL_30A;
case XmlNodeType.EntityReference:
if (!r.CanResolveEntity)
{
goto Block_25;
}
r.ResolveEntity();
goto IL_30A;
case XmlNodeType.ProcessingInstruction:
xNode = new XProcessingInstruction(r.Name, r.Value);
goto IL_30A;
case XmlNodeType.Comment:
xNode = new XComment(r.Value);
goto IL_30A;
case XmlNodeType.DocumentType:
xNode = new XDocumentType(r.LocalName, r.GetAttribute("PUBLIC"), r.GetAttribute("SYSTEM"), r.Value, r.DtdInfo);
goto IL_30A;
case XmlNodeType.EndElement:
{
if (xContainer.content == null)
{
xContainer.content = string.Empty;
}
XElement xElement2 = xContainer as XElement;
if (xElement2 != null && xmlLineInfo != null && xmlLineInfo.HasLineInfo())
{
xElement2.SetEndElementLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
}
if (xContainer == this)
{
return;
}
if (text != null && xContainer.HasBaseUri)
{
text = xContainer.parent.BaseUri;
}
xContainer = xContainer.parent;
goto IL_30A;
}
case XmlNodeType.EndEntity:
goto IL_30A;
}
break;
IL_30A:
if (xNode != null)
{
if (text != null && text != baseURI)
{
xNode.SetBaseUri(baseURI);
}
if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
{
xNode.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
}
xContainer.AddNodeSkipNotify(xNode);
xNode = null;
}
if (!r.Read())
{
return;
}
}
goto IL_2E1;
Block_25:
throw new InvalidOperationException(Res.GetString("InvalidOperation_UnresolvedEntityReference"));
IL_2E1:
throw new InvalidOperationException(Res.GetString("InvalidOperation_UnexpectedNodeType", new object[]
{
r.NodeType
}));
}

It will parse the incoming stream (whether it is from a file or a string doesn't matter) when you call Load() and then keep a local instance of the document in memory. Since the source can be anything (could be a NetworkStream, a DataReader, a string entered by the user) it couldn't go back and try to read the data again since it wouldn't know the state of it (streams being closed etc).
If you really want speed on the other hand, XDocument isn't the fastets (all though it is easier to work with) since it will need to first parse the document and then retain it in memory. If you are working with really large documents using an approach with System.Xml.XmlReader is usually way faster since it can read the document as a stream and doesn't need to retain anything except the current element. This benchmark shows some interesting figures about this.

I do no think it continuously reads; the nice thing about the XDocument.Load method is that it uses XmlReader to read the XML into an XML tree. And since now you just created a tree which is most likely stored in your memory as a tree it no longer reads the document constantly. It manipulates the tree and since it is a tree all your reading and modification are done a lot faster. Although it does not not implement IDisposable it is automatically disposed.

Related

How to get data out of xml XDocument in C#?

I have an XML file that I get via a web service. File looks like this:
<Z_STU_CRS_TRNS_DOC xmlns="http://testurl">
<Z_STATUS_CODE>0</Z_STATUS_CODE>
<Z_STATUS_MSG>Success</Z_STATUS_MSG>
<Z_STUDENT_ID_SUB_DOC xmlns="http://testurl">
<Z_STU_ID>000999999</Z_STU_ID>
</Z_STUDENT_ID_SUB_DOC>
<Z_CRSE_SUB_DOC xmlns="http://testurl">
<Z_COURSE xmlns="http://testurl">
<Z_CRSE_DATA>9999|199901|TEST|9999|1|S|Scuba Diving| |XX</Z_CRSE_DATA>
</Z_COURSE>
<Z_COURSE xmlns="testurl">
<Z_CRSE_DATA>9999|200001|TEST|999|3|A|English 101| |XX</Z_CRSE_DATA>
</Z_COURSE>
</Z_CRSE_SUB_DOC>
</Z_STU_CRS_TRNS_DOC>
I'm able to consume the service and check for errors but I'm having a tough time actually getting the data out of the XDocument xml file.
protected void webClient_DownloadStringCompleted(object sender,
DownloadStringCompletedEventArgs e)
{
if (e.Error != null)
{
errorLabel.Text =
"The transaction failed. If you feel that you have reached " +
"this in error, please contact the help desk at xxx-xxx-xxxx.";
errorLabel.Visible = true;
return;
}
XDocument xml = XDocument.Parse(e.Result);
XNamespace ns = "http://testurl";
//Look for error code from WS
var field = xml.Descendants(ns + "Z_STATUS_CODE").FirstOrDefault();
if (field != null)
{
if (Convert.ToInt32((string)field.Value) == 1)
{
errorLabel.Text =
"The transaction failed, due to an invalid student id. If you " +
"feel that you have reached this in error, please contact " +
"the help desk at xxx-xxx-xxxx.";
errorLabel.Visible = true;
return;
}
}
I tried many different ways but nothing seems to help. Can someone help?

I figured it out! Posting in case someone else has similar issue.
List<studentRecord> studentCourses = new List<studentRecord>();
XmlReader reader = xml.CreateReader();
// Get elements
while (reader.Read())
{
if ((reader.NodeType == XmlNodeType.Element) && (reader.Name == "Z_CRSE_DATA"))
{
reader.Read();
if (reader.NodeType == XmlNodeType.Text)
{
studentRecord stuRec = new studentRecord();
stuRec.rawData = reader.Value;
studentCourses.Add(stuRec);
}
}
}
reader.Close();

You could read the node list, and read the values of the nodes according to their name.
It takes a bit more work, as you have to work through all the nodes itself, but like this you could do it
class Program
{
static void Main(string[] args)
{
string xmldata = #"<Z_STU_CRS_TRNS_DOC xmlns=""http://testurl"">
<Z_STATUS_CODE>0</Z_STATUS_CODE>
<Z_STATUS_MSG>Success</Z_STATUS_MSG>
<Z_STUDENT_ID_SUB_DOC xmlns=""http://testurl"">
<Z_STU_ID>000999999</Z_STU_ID>
</Z_STUDENT_ID_SUB_DOC>
<Z_CRSE_SUB_DOC xmlns=""http://testurl"">
<Z_COURSE xmlns=""http://testurl"">
<Z_CRSE_DATA>9999|199901|TEST|9999|1|S|Scuba Diving| |XX</Z_CRSE_DATA>
</Z_COURSE>
<Z_COURSE xmlns=""testurl"">
<Z_CRSE_DATA>9999|200001|TEST|999|3|A|English 101| |XX</Z_CRSE_DATA>
</Z_COURSE>
</Z_CRSE_SUB_DOC>
</Z_STU_CRS_TRNS_DOC>";
string errorTag = "Z_STATUS_CODE",
statusTag = "Z_STATUS_MSG";
XDocument xml = XDocument.Parse(xmldata);
XNamespace ns = "http://testurl";
int errorCode = -1;
string statusMessage = string.Empty;
using (XmlReader reader = xml.CreateReader())
{
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Element)
{
continue;
}
if (!string.Equals(reader.Name, errorTag) &&
!string.Equals(reader.Name, statusTag))
{
continue;
}
string currentName = reader.Name;
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.EndElement)
{
break;
}
if (reader.NodeType != XmlNodeType.Text)
{
continue;
}
if (string.Equals(currentName, errorTag))
{
errorCode = int.Parse(reader.Value);
}
if (string.Equals(currentName, statusTag))
{
statusMessage = reader.Value;
}
break;
}
}
}
if (errorCode == -1)
{
// no tag found
Console.WriteLine("No tag found named: {0}", errorTag);
}
else if (errorCode == 0)
{
Console.WriteLine("Operation was a success!");
}
else
{
Console.WriteLine("Operation failed with error code {0}", errorCode);
}
if (!string.IsNullOrWhiteSpace(statusMessage))
{
Console.WriteLine("Status message: {0}", statusMessage);
}
Console.ReadLine();
}
}

String remains null even after putting data

I am new to windows metro apps and totally stuck here. textbox1.text displaying the accurate data inside the function but Aya remains null outside the function. How can i solve this problem ? I think recursion is creating problem but how to solve it ?
public async void Aya_Parse()
{
// Initialize http client.
HttpClient httpClient = new HttpClient();
Stream stream = await httpClient.GetStreamAsync("some link");
// Load html document from stream provided by http client.
HtmlDocument htmlDocument = new HtmlDocument();
htmlDocument.OptionFixNestedTags = true;
htmlDocument.Load(stream);
Aya_ParseHtmlNode(htmlDocument.DocumentNode);
}
int aia = 0;
string Aya = null;
private void Aya_ParseHtmlNode(HtmlNode htmlNode)
{
foreach (HtmlNode childNode in htmlNode.ChildNodes)
{
if (childNode.NodeType == HtmlNodeType.Text && aia == 1)
{
Aya += " " + childNode.InnerText.ToString(); aia = 0;
}
else if (childNode.NodeType == HtmlNodeType.Element)
{
Aya += " "; // removing this causes null exception at textbox1.text
switch (childNode.Name.ToLower())
{
case "span":
Aya += childNode.NextSibling.InnerText.ToString();
Aya_ParseHtmlNode(childNode);
break;
case "td":
aia = 1;
Aya_ParseHtmlNode(childNode);break;
default:
Aya_ParseHtmlNode(childNode); break;
}
}
}
textBox1.Text = Aya;
}

You never assign a starting value to Aya, so even though you try to add text to it in your Aya_ParseHtmlNode(HtmlNode htmlNode) method, you can't add text to a null value. This can be fixed by doing a check for null on the value and setting it to a default. I'm surprised you aren't getting a NullArgumentException inside your method...
public async void Aya_Parse()
{
// Initialize http client.
HttpClient httpClient = new HttpClient();
Stream stream = await httpClient.GetStreamAsync("some link");
// Load html document from stream provided by http client.
HtmlDocument htmlDocument = new HtmlDocument();
htmlDocument.OptionFixNestedTags = true;
htmlDocument.Load(stream);
// greetingOutput.Text = htmlDocument.DocumentNode.InnerText.ToString();
// Parse html node, this is a recursive function which call itself until
// all the childs of html document has been navigated and parsed.
Aya_ParseHtmlNode(htmlDocument.DocumentNode);
}
int aia = 0;
string Aya = null;
private void Aya_ParseHtmlNode(HtmlNode htmlNode)
{
if (Aya == null)
{
Aya = String.empty;
}
foreach (HtmlNode childNode in htmlNode.ChildNodes)
{
if (childNode.NodeType == HtmlNodeType.Text && aia == 1)
{
Aya += " " + childNode.InnerText.ToString(); aia = 0;
}
else if (childNode.NodeType == HtmlNodeType.Element)
{
Aya += " ";
switch (childNode.Name.ToLower())
{
case "span":
Aya += childNode.NextSibling.InnerText.ToString();
Aya_ParseHtmlNode(childNode);
break;
case "td":
aia = 1;
Aya_ParseHtmlNode(childNode);break;
default:
Aya_ParseHtmlNode(childNode); break;
}
}
}
textBox1.Text = Aya;
}
Using a StringBuilder might also be a better idea here since you could recurse and generate a very large string here, so a StringBuilder would be a easier on your memory
public void Aya_Parse()
{
// Initialize http client.
HttpClient httpClient = new HttpClient();
Stream stream = httpClient.GetStreamAsync("some link").Result;
// Load html document from stream provided by http client.
HtmlDocument htmlDocument = new HtmlDocument();
htmlDocument.OptionFixNestedTags = true;
htmlDocument.Load(stream);
// greetingOutput.Text = htmlDocument.DocumentNode.InnerText.ToString();
// Parse html node, this is a recursive function which call itself until
// all the childs of html document has been navigated and parsed.
//you marked the method Async, and
//since Aya is in the class, if multiple threads call this
//method, you could get inconsistent results
//I have changed it to a parameter here so this doesn't happen
StringBuilder Aya = new StringBuilder()
Aya_ParseHtmlNode(htmlDocument.DocumentNode, Aya);
//I would also move your textbox update here, so you aren't calling
//ToString() all the time, wasting all of the memory benefits
textBox1.Text = Aya.ToString();
}
int aia = 0;
private void Aya_ParseHtmlNode(HtmlNode htmlNode, StringBuilder Aya)
{
foreach (HtmlNode childNode in htmlNode.ChildNodes)
{
if (childNode.NodeType == HtmlNodeType.Text && aia == 1)
{
Aya.Append(childNode.InnerText); aia = 0;
}
else if (childNode.NodeType == HtmlNodeType.Element)
{
Aya.Append(" ");
switch (childNode.Name.ToLower())
{
case "span":
Aya.Append(childNode.NextSibling.InnerText);
Aya_ParseHtmlNode(childNode, Aya);
break;
case "td":
aia = 1;
Aya_ParseHtmlNode(childNode, Aya);break;
default:
Aya_ParseHtmlNode(childNode, Aya); break;
}
}
}
}
Edit: Your issue actually probably comes from your use of the async keyword on Aya_Parse() which means that the method calling Aya_Parse() may return immediately before it actually does any processing. So if you are checking the value of Aya after calling Aya_Parse(), it likely has not had enough time to do the computation before you actually check the value elsewhere in your code. I recommend removing the async tag, or changing Aya_Parse() to return the value of Aya when it finishes. Check here for some good info on how to use the async tag with return values.

It could be. It's behaving as if your string variable is passed into the method by value rather than holding the reference.
Keep in mind that by using Async methods you are effectively multi threading, so multiple threads would be contending for the same module level variable. The compiler is likely choosing to make your code threadsafe for you.
If you declare a separate string inside your async method and pass it in by ref is should behave as you expect.
I would also suggest you do the same with your module level int.
OR... you could remove the async from the Aya_Parse and use the Task library (and toss in a Wait call below) to get your stream.

Regex XML parsing C#

I am trying to build a regex parser for a single XML block.
I know people will say that Regex is not a good plan for xml, but I am working with stream data and I just need to know if a complete xml block has been broadcast and is sitting in the buffer.
I am trying to handle for anything between the Opening and closing blocks of the XML and any data in parameters of the main block header.
My example code is below the broken down Regular Expression, if anyone has any input on how to make this as comprehensive as possible I would greatly appreciate it.
Here is my regular expression formatted for visual aid.
I am balancing the group, as well as the group and validating that they do not exist at the end of the expression segments.
/*
^(?<TAG>[<]
(?![?])
(?<TAGNAME>[^\s/>]*)
)
(?<ParamData>
(
(\"
(?>
\\\"|
[^"]|
\"(?<quote>)|
\"(?<-quote>)
)*
(?(quote)(?!))
\"
)|
[^/>]
)*?
)
(?:
(?<HASCONTENT>[>])|
(?<-TAG>
(?<TAGEND>/[>])
)
)
(?(HASCONTENT)
(
(?<CONTENT>
(
(?<inTAG>[<]\<TAGNAME>)(?<-inTAG>/[>])?|
(?<-inTAG>[<]/\<TAGNAME>[>])|
([^<]+|[<](?![/]?\<TAGNAME>))
)*?
(?(inTAG)(?!))
)
)
(?<TAGEND>(?<-TAG>)[<]/\<TAGNAME>[>])
)
(?(TAG)(?!))
*/
Within my class, I expect that any Null object returned means there was no xml block on the queue.
Here is the class I am using.
(I used a literal string (#"") to limit the escape requirements, All " characters were replaced with "" to format properly.
public class XmlDataParser
{
// xmlObjectExpression defined below to limit code highlight errors
private Regex _xmlRegex;
private Regex xmlRegex
{
get
{
if (_xmlRegex == null)
{
_xmlRegex = new Regex(xmlObjectExpression);
}
return _xmlRegex;
}
}
private string backingStore = "";
public bool HasObject()
{
return (backingStore != null) && xmlRegex.IsMatch(backingStore);
}
public string GetObject()
{
string result = null;
if (HasObject())
{
lock (this)
{
Match obj = xmlRegex.Match(backingStore);
result = obj.Value;
backingStore = backingStore.Substring(result.Length);
}
}
return result;
}
public void AddData(byte[] bytes)
{
lock (this)
{
backingStore += System.Text.Encoding.Default.GetString(bytes);
}
}
private static string xmlObjectExpression = #"^(?<TAG>[<](?![?])(?<TAGNAME>[^\s/>]*))(?<ParamData>((\""(?>\\\""|[^""]|\""(?<quote>)|\""(?<-quote>))*(?(quote)(?!))\"")|[^/>])*?)(?:(?<HASCONTENT>[>])|(?<-TAG>(?<TAGEND>/[>])))(?(HASCONTENT)((?<CONTENT>((?<inTAG>[<]\<TAGNAME>)(?<-inTAG>/[>])?|(?<-inTAG>[<]/\<TAGNAME>[>])|([^<]+|[<](?![/]?\<TAGNAME>)))*?(?(inTAG)(?!))))(?<TAGEND>(?<-TAG>)[<]/\<TAGNAME>[>]))(?(TAG)(?!))";
}

Just use XmlReader and feed it a TextReader. To read streams, you want to change the ConformanceLevel to Fragment.
XmlReaderSettings settings = new XmlReaderSettings();
settings.ConformanceLevel = ConformanceLevel.Fragment;
using (XmlReader reader = XmlReader.Create(tr,settings))
{
while (reader.Read())
{
switch (reader.NodeType)
{
// this is from my code. You'll rewrite this part :
case XmlNodeType.Element:
if (t != null)
{
t.SetName(reader.Name);
}
else if (reader.Name == "event")
{
t = new Event1();
t.Name = reader.Name;
}
else if (reader.Name == "data")
{
t = new Data1();
t.Name = reader.Name;
}
else
{
throw new Exception("");
}
break;
case XmlNodeType.Text:
if (t != null)
{
t.SetValue(reader.Value);
}
break;
case XmlNodeType.XmlDeclaration:
case XmlNodeType.ProcessingInstruction:
break;
case XmlNodeType.Comment:
break;
case XmlNodeType.EndElement:
if (t != null)
{
if (t.Name == reader.Name)
{
t.Close();
t.Write(output);
t = null;
}
}
break;
case XmlNodeType.Whitespace:
break;
}
}
}

How to split a Word document by specific text using C# and the Open XML SDK?

I want to split a Word document in two by a specific text programatically using C# and the Open XML SDK.
What I've done for the first part is removing all paragraphs up until the paragraph containing the desired text. this worked fine.
Then on the copy of the original document I did the same only this time removing all paragraphs starting from the one containing the desired text.
For some reason the second part turned out to be an invalid document, that can't be opened using word.
Opening the corrupted document with "Open XML SDK 2.0 Productivity Tool" and validating it, doesn't detect any problems with the document.
This is the code removing the part before the desired text (works fine):
public static void DeleteFirstPart(string docName)
{
using (WordprocessingDocument document = WordprocessingDocument.Open(docName, true))
{
DocumentFormat.OpenXml.Wordprocessing.Document doc = document.MainDocumentPart.Document;
List<Text> textparts = document.MainDocumentPart.Document.Body.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>().ToList();
foreach (Text textfield in textparts)
{
if (!textfield.Text.Contains("split here"))
{
RemoveItem1(textfield);
}
else
{
break;
}
}
}
}
I Tried two different remove item methods, both with same result:
private static void RemoveItem1(Text item)
{
// Need to go up at least two levels to get to the run.
if ((item.Parent != null) &&
(item.Parent.Parent != null) &&
(item.Parent.Parent.Parent != null))
{
var topNode = item.Parent.Parent;
var topParentNode = item.Parent.Parent.Parent;
if (topParentNode != null)
{
topNode.Remove();
// No more children? Remove the parent node, as well.
if (!topParentNode.HasChildren)
{
topParentNode.Remove();
}
}
}
}
private static void RemoveItem2(Text textfield)
{
if (textfield.Parent != null)
{
if (textfield.Parent.Parent != null)
{
if (textfield.Parent.Parent.Parent != null)
{
textfield.Parent.Parent.Remove();
}
else
{
textfield.Parent.Remove();
}
}
else
{
textfield.Remove();
}
}
}
This is the code removing the part starting from the desired text (corrupts the document):
public static void DeleteSecondPart(string docName)
{
using (WordprocessingDocument document = WordprocessingDocument.Open(docName, true))
{
DocumentFormat.OpenXml.Wordprocessing.Document doc = document.MainDocumentPart.Document;
List<Text> textparts = document.MainDocumentPart.Document.Body.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>().ToList();
bool remove = false;
foreach (Text textfield in textparts)
{
if (textfield.Text.Contains("split here"))
{
remove = true;
}
if(remove)
{
RemoveItem1(textfield);
//Using this commented code line, instead of the one above, removes only the text field itself, it works fine, the document is valid, but it leaves empty paragraphs that could be pages long.
//textfield.Remove();
}
}
}
}

A rewrite of the RemoveItem method did the trick:
private static void RemoveItem3(Text textfield)
{
OpenXmlElement element = textfield;
while (!(element.Parent is DocumentFormat.OpenXml.Wordprocessing.Body) && element.Parent != null)
{
element = element.Parent;
}
if (element.Parent != null)
{
element.Remove();
}
}

Drag and drop virtual files using IStream

I want to enable drag and drop from our windows forms based application to Windows Explorer. The big problem: The files are stored in a database, so I need to use delayed data rendering. There is an article on codeproject.com, but the author is using a H_GLOBAL object which leads to memory problems with files bigger than aprox. 20 MB. I haven't found a working solution for using an IStream Object instead. I think this must be possible to implement, because this isn't an unusual case. (A FTP program needs such a feature too, for example)
Edit: Is it possible to get an event when the user drops the file? So I could for example copy it to temp and the explorer gets it from there? Maybe there is an alternative approach for my problem...

AFAIK, there is not working article about this for .net. So you should write it by yourself, this is somewhat complicate, because .net DataObject class is limited. I have working example of the opposite task (accepting delayed rendering files from explorer), but it is easier, because I do not needed own IDataObject implementation.
So your task will be:
Find working IDataObject implementation in .net. I recommend you look here (Shell Style Drag and Drop in .NET (WPF and WinForms))
You also need an IStream wrapper for managed stream (it is relatively easy to implement)
Implement delayed rendering using information from MSDN (Shell Clipboard Formats)
This is the starting point, and in general enough information to implement such feature. With bit of patience and several unsuccessful attempts you will do it :)
Update: The following code lacks many necessary methods and functions, but the main logic is here.
// ...
private static IEnumerable<IVirtualItem> GetDataObjectContent(System.Windows.Forms.IDataObject dataObject)
{
if (dataObject == null)
return null;
List<IVirtualItem> Result = new List<IVirtualItem>();
bool WideDescriptor = dataObject.GetDataPresent(ShlObj.CFSTR_FILEDESCRIPTORW);
bool AnsiDescriptor = dataObject.GetDataPresent(ShlObj.CFSTR_FILEDESCRIPTORA);
if (WideDescriptor || AnsiDescriptor)
{
IDataObject NativeDataObject = dataObject as IDataObject;
if (NativeDataObject != null)
{
object Data = null;
if (WideDescriptor)
Data = dataObject.GetData(ShlObj.CFSTR_FILEDESCRIPTORW);
else
if (AnsiDescriptor)
Data = dataObject.GetData(ShlObj.CFSTR_FILEDESCRIPTORA);
Stream DataStream = Data as Stream;
if (DataStream != null)
{
Dictionary<string, VirtualClipboardFolder> FolderMap =
new Dictionary<string, VirtualClipboardFolder>(StringComparer.OrdinalIgnoreCase);
BinaryReader Reader = new BinaryReader(DataStream);
int Count = Reader.ReadInt32();
for (int I = 0; I < Count; I++)
{
VirtualClipboardItem ClipboardItem;
if (WideDescriptor)
{
FILEDESCRIPTORW Descriptor = ByteArrayHelper.ReadStructureFromStream<FILEDESCRIPTORW>(DataStream);
if (((Descriptor.dwFlags & FD.FD_ATTRIBUTES) > 0) && ((Descriptor.dwFileAttributes & FileAttributes.Directory) > 0))
ClipboardItem = new VirtualClipboardFolder(Descriptor);
else
ClipboardItem = new VirtualClipboardFile(Descriptor, NativeDataObject, I);
}
else
{
FILEDESCRIPTORA Descriptor = ByteArrayHelper.ReadStructureFromStream<FILEDESCRIPTORA>(DataStream);
if (((Descriptor.dwFlags & FD.FD_ATTRIBUTES) > 0) && ((Descriptor.dwFileAttributes & FileAttributes.Directory) > 0))
ClipboardItem = new VirtualClipboardFolder(Descriptor);
else
ClipboardItem = new VirtualClipboardFile(Descriptor, NativeDataObject, I);
}
string ParentFolder = Path.GetDirectoryName(ClipboardItem.FullName);
if (string.IsNullOrEmpty(ParentFolder))
Result.Add(ClipboardItem);
else
{
VirtualClipboardFolder Parent = FolderMap[ParentFolder];
ClipboardItem.Parent = Parent;
Parent.Content.Add(ClipboardItem);
}
VirtualClipboardFolder ClipboardFolder = ClipboardItem as VirtualClipboardFolder;
if (ClipboardFolder != null)
FolderMap.Add(PathHelper.ExcludeTrailingDirectorySeparator(ClipboardItem.FullName), ClipboardFolder);
}
}
}
}
return Result.Count > 0 ? Result : null;
}
// ...
public VirtualClipboardFile : VirtualClipboardItem, IVirtualFile
{
// ...
public Stream Open(FileMode mode, FileAccess access, FileShare share, FileOptions options, long startOffset)
{
if ((mode != FileMode.Open) || (access != FileAccess.Read))
throw new ArgumentException("Only open file mode and read file access supported.");
System.Windows.Forms.DataFormats.Format Format = System.Windows.Forms.DataFormats.GetFormat(ShlObj.CFSTR_FILECONTENTS);
if (Format == null)
return null;
FORMATETC FormatEtc = new FORMATETC();
FormatEtc.cfFormat = (short)Format.Id;
FormatEtc.dwAspect = DVASPECT.DVASPECT_CONTENT;
FormatEtc.lindex = FIndex;
FormatEtc.tymed = TYMED.TYMED_ISTREAM | TYMED.TYMED_HGLOBAL;
STGMEDIUM Medium;
FDataObject.GetData(ref FormatEtc, out Medium);
try
{
switch (Medium.tymed)
{
case TYMED.TYMED_ISTREAM:
IStream MediumStream = (IStream)Marshal.GetTypedObjectForIUnknown(Medium.unionmember, typeof(IStream));
ComStreamWrapper StreamWrapper = new ComStreamWrapper(MediumStream, FileAccess.Read, ComRelease.None);
// Seek from beginning
if (startOffset > 0)
if (StreamWrapper.CanSeek)
StreamWrapper.Seek(startOffset, SeekOrigin.Begin);
else
{
byte[] Null = new byte[256];
int Readed = 1;
while ((startOffset > 0) && (Readed > 0))
{
Readed = StreamWrapper.Read(Null, 0, (int)Math.Min(Null.Length, startOffset));
startOffset -= Readed;
}
}
StreamWrapper.Closed += delegate(object sender, EventArgs e)
{
ActiveX.ReleaseStgMedium(ref Medium);
Marshal.FinalReleaseComObject(MediumStream);
};
return StreamWrapper;
case TYMED.TYMED_HGLOBAL:
byte[] FileContent;
IntPtr MediumLock = Windows.GlobalLock(Medium.unionmember);
try
{
long Size = FSize.HasValue ? FSize.Value : Windows.GlobalSize(MediumLock).ToInt64();
FileContent = new byte[Size];
Marshal.Copy(MediumLock, FileContent, 0, (int)Size);
}
finally
{
Windows.GlobalUnlock(Medium.unionmember);
}
ActiveX.ReleaseStgMedium(ref Medium);
Stream ContentStream = new MemoryStream(FileContent, false);
ContentStream.Seek(startOffset, SeekOrigin.Begin);
return ContentStream;
default:
throw new ApplicationException(string.Format("Unsupported STGMEDIUM.tymed ({0})", Medium.tymed));
}
}
catch
{
ActiveX.ReleaseStgMedium(ref Medium);
throw;
}
}
// ...

Googlers may find this useful: download a file using windows IStream

We Keep Coding

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

What is the fastest way to load XML into an XDocument? - c#

When you create a new XDocument using XDocument.Load, does it open the XML file and keep a local copy, or does it continuously read the document from the hard drive? If it does continuously read, is there a faster way to parse XML? XDocument x = XDocument.Load("file.xml");

Related

How to get data out of xml XDocument in C#?

String remains null even after putting data

Regex XML parsing C#

How to split a Word document by specific text using C# and the Open XML SDK?

Drag and drop virtual files using IStream

Categories

Resources