c# Merging two XMLs giving error - c#

I am trying to merge two XMLs with same structure but different data into one.
I am getting this error: A node of type Document cannot be added to content.
Below is my code
var productElements =
testGroupProvider.GetTestGroup().ProductTests.Select(
productTest => new XElement(xNamespace + "Product",
new XElement(xNamespace + "ExternalId", productTest.ProductNameKey),
new XElement(xNamespace + "Name", testGroupProvider.GetProductName(productTest)),
new XElement(xNamespace + "ImageUrl", ChoiceBaseHostName + GetProductImageUrl(productTest, TargetDatabase))));
var root = new XDocument(
new XElement(xNamespace + "Feed",
new XAttribute("xmlns", xNamespace),
new XAttribute("name", BVFeedsName),
new XAttribute("incremental", "true"),
new XAttribute("extractDate", DateTime.Now.ToString("o")),
new XElement(xNamespace + "Categories",
new XElement(xNamespace + "Category",
new XElement(xNamespace + "ExternalId", testGroupProvider.GetProductGroup().Id),
new XElement(xNamespace + "Name", testGroupProvider.GetProductGroup().Name),
testGroupProvider.GetTestGroup().Name),
new XElement(xNamespace + "Products", productElements)));
var filePath = #"D:\testXML\test.xml";
XElement xml = XElement.Load(filePath);
xml.Add(root);
xml.Save(filePath);
Can anyone tell me what i am doing wrong.
This is the XML structure in test.xml
<?xml version="1.0" encoding="utf-8"?>
<Feed xmlns="http://www.bazaarvoice.com/xs/PRR/ProductFeed/5.6" name="Choice" incremental="true" extractDate="2016-07-12T15:24:44.5732750+10:00">
<Categories>
<Category>
<ExternalId>{09B3B4FB-F5CF-4522-BE96-4C4B535580C3}</ExternalId>
<Name>Cereal and muesli</Name>
</Category>
</Categories>
<Products>
<Product>
<ExternalId>coles-almond-hazelnut-macadamia-cluster-fusions</ExternalId>
<Name>Coles Almond, Hazelnut & Macadamia Cluster Fusions</Name>
<ImageUrl></ImageUrl>
</Product>
</Products>
</Feed>
The second XML has the same structure with different products
<?xml version="1.0" encoding="utf-8"?>
<Feed xmlns="http://www.bazaarvoice.com/xs/PRR/ProductFeed/5.6" name="Choice" incremental="true" extractDate="2016-07-12T15:24:44.5732750+10:00">
<Categories>
<Category>
<ExternalId>{12}</ExternalId>
<Name>cat1</Name>
</Category>
</Categories>
<Products>
<Product>
<ExternalId>Id</ExternalId>
<Name>Ccoles</Name>
<ImageUrl></ImageUrl>
</Product>
</Products>
</Feed>
I want to combine them like below
<?xml version="1.0" encoding="utf-8"?>
<Feed xmlns="http://www.bazaarvoice.com/xs/PRR/ProductFeed/5.6" name="Choice" incremental="true" extractDate="2016-07-12T15:24:44.5732750+10:00">
<Categories>
<Category>
<ExternalId>{09B3B4FB-F5CF-4522-BE96-4C4B535580C3}</ExternalId>
<Name>Cereal and muesli</Name>
</Category>
<Category>
<ExternalId>{12}</ExternalId>
<Name>cat1</Name>
</Category>
</Categories>
<Products>
<Product>
<ExternalId>coles-almond-hazelnut-macadamia-cluster-fusions</ExternalId>
<Name>Coles Almond, Hazelnut & Macadamia Cluster Fusions</Name>
<ImageUrl></ImageUrl>
</Product>
<Product>
<ExternalId>Id</ExternalId>
<Name>Ccoles</Name>
<ImageUrl></ImageUrl>
</Product>
</Products>
</Feed>

A xml document must have only one root.
Working with the documents you attached, you can replace the xml.Add(root); with the following (i.e. it will add each node under one root to the other xml root)
foreach (var child in root.Root.Elements())
{
xml.Element(child.Name.ToString()).Add(child.Nodes());
}
Edit - A further generalization
You can generalize the above code using a Merge extension of 2 XElements so that it reads as follows
foreach (var child in root.Elements())
{
xml.Element(child.Name.ToString()).Merge(child, xNamespace + "ExternalId");
}
Having defined the extension
public static void Merge(this XElement root1, XElement root2, XName element_id)
{
root1.Add(root2.Elements().Except(root1.Elements(), new MyComparer(element_id)));
}
with a xml comparer
public class MyComparer : IEqualityComparer<XElement>
{
private XName _element_id;
public MyComparer(XName element_id)
{
_element_id = element_id;
}
public bool Equals(XElement x, XElement y)
{
return x.Element(_element_id).Value.Equals(y.Element(_element_id).Value);
}
public int GetHashCode(XElement el)
{
return el.Element(_element_id).Value.GetHashCode();
}
}

Select correct nodes to add and correct nodes to be added.
var filePath = #"D:\testXML\test.xml";
XElement xml = XElement.Load(filePath);
var xmlCategories = xml.Descendants("Categories").First();
var rootCategories = root.Descendants("Category");
xmlCategories.Add(rootCategories);
var xmlProducts = xml.Descendants("Products").First();
var rootProducts = root.Descendants("Product");
xmlProducts.Add(rootProducts);
xml.Save(filePath);
Be crystal clear what you are doing.

Try this
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication2
{
class Program
{
const string FILENAME1 = #"c:\temp\test1.xml";
const string FILENAME2 = #"c:\temp\test2.xml";
static void Main(string[] args)
{
XDocument doc1 = XDocument.Load(FILENAME1);
XDocument doc2 = XDocument.Load(FILENAME2);
XElement category1 = doc1.Descendants().Where(x => x.Name.LocalName == "Categories").FirstOrDefault();
XElement category2 = doc2.Descendants().Where(x => x.Name.LocalName == "Categories").FirstOrDefault();
category1.Add(category2.Descendants());
XElement product1 = doc1.Descendants().Where(x => x.Name.LocalName == "Products").FirstOrDefault();
XElement product2 = doc2.Descendants().Where(x => x.Name.LocalName == "Products").FirstOrDefault();
product1.Add(product2.Descendants());
}
}
}

Try this, sorry about the VB
'second is The second XML has the same structure with different products
Dim combined As XElement = New XElement(test) 'create copy of test.xml
combined.<Categories>.LastOrDefault.Add(second.<Categories>.Elements)
combined.<Products>.LastOrDefault.Add(second.<Products>.Elements)
or
'if test can be used to combine then
test.<Categories>.LastOrDefault.Add(second.<Categories>.Elements)
test.<Products>.LastOrDefault.Add(second.<Products>.Elements)
The result is
<Feed name="Choice" incremental="true" extractDate="2016-07-12T15:24:44.5732750+10:00" xmlns="http://www.bazaarvoice.com/xs/PRR/ProductFeed/5.6">
<Categories>
<Category>
<ExternalId>{09B3B4FB-F5CF-4522-BE96-4C4B535580C3}</ExternalId>
<Name>Cereal and muesli</Name>
</Category>
<Category>
<ExternalId>{12}</ExternalId>
<Name>cat1</Name>
</Category>
</Categories>
<Products>
<Product>
<ExternalId>coles-almond-hazelnut-macadamia-cluster-fusions</ExternalId>
<Name>Coles Almond, Hazelnut & Macadamia Cluster Fusions</Name>
<ImageUrl></ImageUrl>
</Product>
<Product>
<ExternalId>Id</ExternalId>
<Name>Ccoles</Name>
<ImageUrl></ImageUrl>
</Product>
</Products>
</Feed>
The test data I used is
Dim test As XElement =
<Feed xmlns="http://www.bazaarvoice.com/xs/PRR/ProductFeed/5.6" name="Choice" incremental="true" extractDate="2016-07-12T15:24:44.5732750+10:00">
<Categories>
<Category>
<ExternalId>{09B3B4FB-F5CF-4522-BE96-4C4B535580C3}</ExternalId>
<Name>Cereal and muesli</Name>
</Category>
</Categories>
<Products>
<Product>
<ExternalId>coles-almond-hazelnut-macadamia-cluster-fusions</ExternalId>
<Name>Coles Almond, Hazelnut & Macadamia Cluster Fusions</Name>
<ImageUrl></ImageUrl>
</Product>
</Products>
</Feed>
Dim second As XElement =
<Feed xmlns="http://www.bazaarvoice.com/xs/PRR/ProductFeed/5.6" name="Choice" incremental="true" extractDate="2016-07-12T15:24:44.5732750+10:00">
<Categories>
<Category>
<ExternalId>{12}</ExternalId>
<Name>cat1</Name>
</Category>
</Categories>
<Products>
<Product>
<ExternalId>Id</ExternalId>
<Name>Ccoles</Name>
<ImageUrl></ImageUrl>
</Product>
</Products>
</Feed>
The XElements can be loaded like this
test = XElement.Load("PATH")
second = XElement.Load("second PATH")
and saved like this
test.Save("PATH")
second.Save("second PATH")
combined.Save("combined PATH")

Related

convert dataset into nested xml???Help me

I have 1 dataset, named Invoices, of 2 datatable, the data is taken from sql server 2008. Ignoring the data from the database, I focus on the dataset exported to xml. Datatable 1, named Invoice, includes OrderID, CustomerID, CustomerName, CustomerPhone.
Datatable 2, named Products, includes ProductID, OrderID, ProductName , Price, Quantity, Amount.
My Question is, I want to get this output,
<Invoices>
<OrderID>1</OrderID>
<Invoice>
<CustomerID>1</CustomerID>
<CustomerName>A</CustomerName>
<CustomerPhone>123</CustomerPhone>
<Products>
<Product>
<ProductID>1</ProductID>
<ProductName>C</ProductName>
<Price>10</Price>
<Quantity>2</Quantity>
<Amount>20</Amount>
</Product>
</Products>
<TotalAmount>20</TotalAmount> --TotalAmount = sum(amount of products)
</Invoice>
<OrderID>2</OrderID>
<Invoice>
<CustomerID>3</CustomerID>
<CustomerName>D</CustomerName>
<CustomerPhone>1789</CustomerPhone>
<Products>
<Product>
<ProductID>5</ProductID>
<ProductName>V</ProductName>
<Price>30</Price>
<Quantity>3</Quantity>
<Amount>90</Amount>
</Product>
<Product>
<ProductID>9</ProductID>
<ProductName>Z</ProductName>
<Price>5</Price>
<Quantity>4</Quantity>
<Amount>20</Amount>
</Product>
</Products>
<TotalAmount>110</TotalAmount> --TotalAmount = sum(amount of products)
</Invoice>
</Invoices>
But I get this output:
<Invoices>
<OrderID>1</OrderID>
<Invoice>
<CustomerID>1</CustomerID>
<CustomerName>A</CustomerName>
<CustomerPhone>123</CustomerPhone>
<TotalAmount>20</TotalAmount> --TotalAmount = sum(amount of products)
<Products>
<Product>
<ProductID>1</ProductID>
<ProductName>C</ProductName>
<Price>10</Price>
<Quantity>2</Quantity>
<Amount>20</Amount>
</Product>
</Products>
</Invoice>
<OrderID>2</OrderID>
<Invoice>
<CustomerID>3</CustomerID>
<CustomerName>D</CustomerName>
<CustomerPhone>1789</CustomerPhone>
<TotalAmount>110</TotalAmount> --TotalAmount = sum(amount of products)
<Products>
<Product>
<ProductID>5</ProductID>
<ProductName>V</ProductName>
<Price>30</Price>
<Quantity>3</Quantity>
<Amount>90</Amount>
</Product>
<Product>
<ProductID>9</ProductID>
<ProductName>Z</ProductName>
<Price>5</Price>
<Quantity>4</Quantity>
<Amount>20</Amount>
</Product>
</Products>
</Invoice>
</Invoices>
The code to export xml is:
public DataSet LoadInvoices()
{
DataSet ds = new DataSet("Invoices");
DataTable dt1 = LoadInvoice();
dt1.TableName = "Invoice";
DataTable dt3 = LoadProduct();
dt3.TableName = "Product";
ds.Tables.Add(dt1);
ds.Tables.Add(dt3);
DataColumn colDT3 = dt3.Columns[2];
DataColumn colDT21 = dt1.Columns[0];
DataRelation rel2 = new DataRelation("PRODUCT"
, colDT21, colDT3);
rel2.Nested = true;
ds.Relations.Add(rel2);
return ds;
}
public string xmlConvert(DataSet ds)
{
string sXML = "";
sXML = ds.GetXml();
return sXML;
}
Therefore, I have to adjust the code to export the xml as I want???
You have invalid xml since it contains and ampersand. Should be & The issue probably can be fixed in the dataset query but do not have enough info on the methods that were used to create the xml. You always can edit the xml code Using xml linq to filter output :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = #"c:\temp\test.xml";
static void Main(string[] args)
{
XDocument doc = XDocument.Load(FILENAME);
List<XElement> removeItems = doc.Descendants("catname").Where(x => (string)x != "Hotel").ToList();
removeItems.Remove();
}
}
}

How to merge nodes (when same value found in specific node) in single node

I want to merge nodes when same value found in node and check value of is it sequence wise or not
Please check below points:
1)If code-id tag value is sequence wise and subject tag value is same then merge the nodes.
2)If code-id tag value is sequence wise but subject tag value is diff then avoid merging.
3)If code-id tag value is not sequence wise but subject tag value is same then avoid merging.
4)If code-id tag value is not sequence wise but subject tag value is diff then avoid merging.
Can you please tell me how to get desired output?
Thanks in advance.
My code:
XDocument xDoc = XDocument.Parse(xmlstr);
XElement xElementSelectRoot = xDoc.XPathSelectElement("//root");
XElement xElementCreateRoot = new XElement(new XElement("root"));
IEnumerable<XElement> lstOFCatogory = xElementSelectRoot.XPathSelectElements("//category").OrderBy(r => r.Value).ToList();
foreach (var varXNodeCategory in lstOFCatogory)
{
if (varXNodeCategory.Parent.Name == "Product")
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.LoadXml(varXNodeCategory.Parent.ToString());
XmlNodeList xmlNodeCategoryList = xmlDoc.SelectNodes("//category");
XmlDocument xmlDoc1=new XmlDocument();
xmlDoc1.LoadXml(varXNodeCategory.ToString());
XmlNode xmlNodeSelectCategory = xmlDoc1.SelectSingleNode("//category");
foreach (XmlNode xmlNode in xmlNodeCategoryList)
{
xmlNode.ParentNode.RemoveChild(xmlNode);
if (xmlNode.FirstChild.InnerText.Equals(xmlNodeSelectCategory.FirstChild.InnerText))
{
xmlNodeSelectCategory = xmlNode;
}
}
xmlDoc.LastChild.AppendChild(xmlNodeSelectCategory);
XmlNode xmlNode1 = xmlDoc.SelectSingleNode("//Product");
var varXElement= XElement.Parse(xmlDoc.InnerXml);
xElementCreateRoot.Add(varXElement);
}
else
{
xElementCreateRoot.Add(varXNodeCategory);
}
}
.....................................................................................................................................................................
Input:
.....................................................................................................................................................................
<root>
<category>
<code-id type="pub">e00001</code-id>
<title>test334</title>
<ranking>240</ranking>
</category>
<Product>
<Product-subject-title>
<subject>BIOLOGY ARTICLE</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00002</code-id>
<title>test34</title>
<ranking>160</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>IN BRIEF</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00003</code-id>
<title>test6</title>
<ranking>117</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>IN BRIEF</subject>
</Product-subject-title>
<category>
<span>
<code-id type="pub">e00004</code-id>
</span>
<title>test3</title>
<ranking>52</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>BIOLOGY ARTICLE</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00005</code-id>
<title>test28</title>
<ranking>10</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>IN BRIEF</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00006</code-id>
<title>test4</title>
<ranking>116</ranking>
</category>
</Product>
<category>
<span>
<code-id type="pub">e00007</code-id>
</span>
<title>test76</title>
<ranking>14</ranking>
</category>
</root>
.....................................................................................................................................................................
My output will look like this:
.....................................................................................................................................................................
<root>
<category>
<code-id type="pub">e00001</code-id>
<title>test334</title>
<ranking>240</ranking>
</category>
<Product>
<Product-subject-title>
<subject>BIOLOGY ARTICLE</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00002</code-id>
<title>test34</title>
<ranking>160</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>IN BRIEF</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00003</code-id>
<title>test6</title>
<ranking>117</ranking>
</category>
<category>
<span>
<code-id type="pub">e00004</code-id>
</span>
<title>test3</title>
<ranking>52</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>BIOLOGY ARTICLE</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00005</code-id>
<title>test28</title>
<ranking>10</ranking>
</category>
</Product>
<Product>
<Product-subject-title>
<subject>IN BRIEF</subject>
</Product-subject-title>
<category>
<code-id type="pub">e00006</code-id>
<title>test4</title>
<ranking>116</ranking>
</category>
</Product>
<category>
<span>
<code-id type="pub">e00007</code-id>
</span>
<title>test76</title>
<ranking>14</ranking>
</category>
</root>
I got solution and its working fine now.
XDocument xmlDocNew = XDocument.Parse(xElementCreateRoot.ToString());
XElement xElementSelectRootNew = xmlDocNew.XPathSelectElement("//root");
XElement xElementCreateRootNew = new XElement(new XElement("root"));
IEnumerable<XElement> lstOFCatogoryNew = xElementSelectRootNew.XPathSelectElements("//category").OrderBy(r => r.Value).ToList();
List<Class1> lstClass1 = new List<Class1>();
int intClassIndexCounter = 0;
foreach (var x in lstOFCatogoryNew)
{
if (x.Parent.Name.ToString() == "Product")
{
if (lstClass1.Count() > 0)
{
if (lstClass1[lstClass1.Count()-1].SujectName.Trim().ToString() == ((XElement)x.Parent.FirstNode).Value.Trim().ToString())
{
var xsDoc = new XDocument(new XElement(XElement.Parse(x.Parent.ToString())));
XElement xElementSelectProduct = xsDoc.XPathSelectElement("//Product");
xElementCreateRootNew.Elements("Product").Last().Add(xElementSelectProduct.XPathSelectElement("//category"));
lstClass1.Add(new Class1(((XElement)x.Parent.FirstNode).Value.Trim().ToString(),//subject title
((XElement)x.FirstNode).Value.Trim().ToString(),//code-id
x.Parent.ToString().Trim()));//product set
intClassIndexCounter++;
}
else
{
xElementCreateRootNew.Add(x.Parent);
lstClass1.Add(new Class1(((XElement)x.Parent.FirstNode).Value.Trim().ToString(),//subject title
((XElement)x.FirstNode).Value.Trim().ToString(),//code-id
x.Parent.ToString().Trim()));//product set
intClassIndexCounter++;
}
}
else
{
xElementCreateRootNew.Add(x.Parent);
lstClass1.Add(new Class1(((XElement)x.Parent.FirstNode).Value.Trim().ToString(),//subject title
((XElement)x.FirstNode).Value.Trim().ToString(),//code-id
x.Parent.ToString().Trim()));//product set
intClassIndexCounter++;
}
}
else
{
xElementCreateRootNew.Add(x);
}
}

Accessing xml elements using LINQ to XML

I have a xml document like this and I need to access the "employees", "employee" elements so I am trying to use linq's XDocument class to get the employee elements but it always returns empty value.
Sample xml:
<organization>
<metadata>
</metadata>
<main>
<otherInfo>
</otherInfo>
<employeeInfo>
<employees>
<employee>
<id>1</id>
<name>ABC</name>
</employee>
<employee>
<id>2</id>
<name>ASE</name>
</employee>
<employee>
<id>3</id>
<name>XYZ</name>
</employee>
</employees>
</employeeInfo>
</main>
</organization>
C# code:
XDocument xDoc = XDocument.Parse(xmlString);
var allEmployees = from d in xDoc.Descendants("employeeInfo")
from ms in d.Elements("employees")
from m in ms.Elements("employee")
select m;
It kind of depends on what information you need. Your select returns an IEnumerable list.
This code will print out each employee
string xmlString = #"<organization>
<metadata>
</metadata>
<main>
<otherInfo>
</otherInfo>
<employeeInfo>
<employees>
<employee>
<id>1</id>
<name>ABC</name>
</employee>
<employee>
<id>2</id>
<name>ASE</name>
</employee>
<employee>
<id>3</id>
<name>XYZ</name>
</employee>
</employees>
</employeeInfo>
</main>
</organization>";
XDocument xDoc = XDocument.Parse(xmlString);
var allEmployees = from d in xDoc.Descendants("employeeInfo")
from ms in d.Elements("employees")
from m in ms.Elements("employee")
select m;
foreach (var emp in allEmployees) {
Console.WriteLine(emp);
}
Console.Read();
XDocument xDoc = XDocument.Parse(xmlString);
var allEmployees = (from r in xDoc.Descendants("employee")
select new
{
Id = r.Element("id").Value,
Name = r.Element("name").Value
}).ToList();
foreach (var r in allEmployees)
{
Console.WriteLine(r.Id + " " + r.Name);
}
Just use Descendants("Employee");
XDocument xDoc = XDocument.Parse(xmlString);
var allEmployees = xDoc.Descendants("employee").ToList();

How can be this xml parsed in efficient way?

I am beginner in C#.
Simple Example of bigger case:
Input:
<?xml version="1.0" encoding="utf-8"?>
<products>
<product>
<id>1</id>
<name>John</name>
</product>
<product>
<id>2</id>
<name>Tom</name>
</product>
<product>
<id>3</id>
<name>Sam</name>
</product>
</products>
</xml>
Output(for id=1):
<id>2</id>
<name>Tom</name>
My part code try psedocode:
XDocument doc=XDocument.Parse(".............");
var els= doc.Descendants("product");
foreach(e in els){
node=e.Element("id");
if(2==node.Value){
return e;
}
}
Please help,
Thanks
Currently your xml file is not well-formatted - remove closing </xml> tag from your file to make it valid. And here is the query:
int id = 1;
XDocument xdoc = XDocument.Load(path_to_xml);
var product = xdoc.Descendants("product")
.Where(p => (int)p.Element("id") == id)
.SingleOrDefault();
This query will return whole <product> element or null if match not found.
Also I believe product name will be enough for you to select (because you already have product id):
var name = xdoc.Descendants("product")
.Where(p => (int)p.Element("id") == id)
.Select(p => (string)p.Element("name"))
.SingleOrDefault();
Returns Tom for id = 2
This will return the product (as in your question) not the id
var product = doc.XPathSelectElement("//product[id and id[text() = '1']]");
You might be looking for XPath:
root.XPathSelectElements(#"//products/product/id[text()='2']")
Edit To the comment: Directly getting the name: //products/product/id[text()='2']/../name
See full example
using System.Xml.Linq;
using System.Xml.XPath;
public class Program
{
public static void Main(string[] args)
{
var doc = XDocument.Parse(XML);
foreach(var n in doc.Root.XPathSelectElements(
#"//products/product/id[text()='2']"))
{
System.Console.WriteLine("Not that hard: '{0}'", n.Parent.Element("name").Value);
}
// Direct query for name:
foreach(var n in doc.Root.XPathSelectElements(
#"//products/product/id[text()='2']/../name"))
{
System.Console.WriteLine("Directly: '{0}'", n.Value);
}
}
private const string XML =
#"<?xml version=""1.0"" encoding=""utf-8""?>
<products>
<product>
<id>1</id>
<name>John</name>
</product>
<product>
<id>2</id>
<name>Tom</name>
</product>
<product>
<id>3</id>
<name>Sam</name>
</product>
</products>";
}
Printing:
Not that hard: 'Tom'
Directly: 'Tom'

Xml simplification/extraction of distinct values - possible LINQ

Sorry for this long post....But i have a headache from this task.
I have a mile long xml document where I need to extract a list, use distinct values, and pass for transformation to web.
I have completed the task using xslt and keys, but the effort is forcing the server to its knees.
Description:
hundreds of products in xml, all with a number of named and Id'ed cattegories, all categories with at least one subcategory with name and id.
The categories are unique with ID, all subcategories are unique WITHIN that category:
Simplified example form the huge file (left our tons of info irrelevant to the task):
<?xml version="1.0" encoding="utf-8"?>
<root>
<productlist>
<product id="1">
<name>Some Product</name>
<categorylist>
<category id="1">
<name>cat1</name>
<subcategories>
<subcat id="1">
<name>subcat1</name>
</subcat>
<subcat id="2">
<name>subcat1</name>
</subcat>
</subcategories>
</category>
<category id="2">
<name>cat1</name>
<subcategories>
<subcat id="1">
<name>subcat1</name>
</subcat>
</subcategories>
</category>
<category id="3">
<name>cat1</name>
<subcategories>
<subcat id="1">
<name>subcat1</name>
</subcat>
</subcategories>
</category>
</categorylist>
</product>
<product id="2">
<name>Some Product</name>
<categorylist>
<category id="1">
<name>cat1</name>
<subcategories>
<subcat id="2">
<name>subcat2</name>
</subcat>
<subcat id="4">
<name>subcat4</name>
</subcat>
</subcategories>
</category>
<category id="2">
<name>cat2</name>
<subcategories>
<subcat id="1">
<name>subcat1</name>
</subcat>
</subcategories>
</category>
<category id="3">
<name>cat3</name>
<subcategories>
<subcat id="1">
<name>subcat1</name>
</subcat>
</subcategories>
</category>
</categorylist>
</product>
</productlist>
</root>
DESIRED RESULT:
<?xml version="1.0" encoding="utf-8"?>
<root>
<maincat id="1">
<name>cat1</name>
<subcat id="1"><name>subcat1</name></subcat>
<subcat id="2"><name>subcat2</name></subcat>
<subcat id="3"><name>subcat3</name></subcat>
</maincat>
<maincat id="2">
<name>cat2</name>
<subcat id="1"><name>differentsubcat1</name></subcat>
<subcat id="2"><name>differentsubcat2</name></subcat>
<subcat id="3"><name>differentsubcat3</name></subcat>
</maincat>
<maincat id="2">
<name>cat2</name>
<subcat id="1"><name>differentsubcat1</name></subcat>
<subcat id="2"><name>differentsubcat2</name></subcat>
<subcat id="3"><name>differentsubcat3</name></subcat>
</maincat>
</root>
(original will from 2000 products produce 10 categories with from 5 to 15 subcategories)
Things tried:
Xslt with keys - works fine, but pooooor performance
Played around with linq:
IEnumerable<XElement> mainCats =
from Category1 in doc.Descendants("product").Descendants("category") select Category1;
var cDoc = new XDocument(new XDeclaration("1.0", "utf-8", null), new XElement("root"));
cDoc.Root.Add(mainCats);
cachedCategoryDoc = cDoc.ToString();
Result was a "categories only" (not distinct values of categories or subcategories)
Applied the same xlst to that, and got fairly better performance..... but still far from usable...
Can i apply some sort of magic with the linq statement to have the desired output??
A truckload of good karma goes out to the ones that can point me in det right direction..
//Steen
NOTE:
I am not stuck on using linq/XDocument if anyone has better options
Currently on .net 3.5, can switch to 4 if needed
If I understood your question corectly, here's a LINQ atempt.
The query below parses your XML data and creates a custom type which represents a category and contains the subcategories of that element.
After parsing, the data is grouped by category Id to get distinct subcategories for each category.
var doc = XElement.Load("path to the file");
var results = doc.Descendants("category")
.Select(cat => new
{
Id = cat.Attribute("id").Value,
Name = cat.Descendants("name").First().Value,
Subcategories = cat.Descendants("subcat")
.Select(subcat => new
{
Id = subcat.Attribute("id").Value,
Name = subcat.Descendants("name").First().Value
})
})
.GroupBy(x=>x.Id)
.Select(g=>new
{
Id = g.Key,
Name = g.First().Name,
Subcategories = g.SelectMany(x=>x.Subcategories).Distinct()
});
From the results above you can create your document using the code below:
var cdoc = new XDocument(new XDeclaration("1.0", "utf-8", null), new XElement("root"));
cdoc.Root.Add(
results.Select(x=>
{
var element = new XElement("maincat", new XAttribute("id", x.Id));
element.Add(new XElement("name", x.Name));
element.Add(x.Subcategories.Select(c=>
{
var subcat = new XElement("subcat", new XAttribute("id", c.Id));
subcat.Add(new XElement("name", c.Name));
return subcat;
}).ToArray());
return element;
}));
Try this i have done something for it.. attributes are missing you can add them using XElement ctor
var doc = XDocument.Load(reader);
IEnumerable<XElement> mainCats =
doc.Descendants("product").Descendants("category").Select(r =>
new XElement("maincat", new XElement("name", r.Element("name").Value),
r.Descendants("subcat").Select(s => new XElement("subcat", new XElement("name", s.Element("name").Value)))));
var cDoc = new XDocument(new XDeclaration("1.0", "utf-8", null), new XElement("root"));
cDoc.Root.Add(mainCats);
var cachedCategoryDoc = cDoc.ToString();
Regards.
This will parse your xml into a dictionary of categories with all the distinct subcategory names. It uses XPath from this library: https://github.com/ChuckSavage/XmlLib/
XElement root = XElement.Load(file);
string[] cats = root.XGet("//category/name", string.Empty).Distinct().ToArray();
Dictionary<string, string[]> dict = new Dictionary<string, string[]>();
foreach (string cat in cats)
{
// Get all the categories by name and their subcat names
string[] subs = root
.XGet("//category[name={0}]/subcategories/subcat/name", string.Empty, cat)
.Distinct().ToArray();
dict.Add(cat, subs);
}
Or the parsing as one statement:
Dictionary<string, string[]> dict = root
.XGet("//category/name", string.Empty)
.Distinct()
.ToDictionary(cat => cat, cat => root
.XGet("//category[name={0}]/subcategories/subcat/name", string.Empty, cat)
.Distinct().ToArray());
I give you the task of assembling your resulting xml from the dictionary.

Categories