I'm using HTML Agility Pack to web scrape to datatable. However the website have multiple same column name which it was not able to add on for the second table.
The error will be prompt out like this as the "2020" had been added before
My code as below :
public void WebDataScrap()
{
try
{
//Get the content of the URL from the Web
const string url = "https://www.wsj.com/market-data/quotes/MY/XKLS/0146/financials/annual/cash-flow";
var web = new HtmlWeb();
var doc = web.Load(url);
const string classValue = "cr_dataTable"; //cr_datatable
//var nodes = doc.DocumentNode.SelectNodes($"//table[#class='{classValue}']") ?? Enumerable.Empty<HtmlNode>();
var resultDataset = new DataSet();
foreach (HtmlNode table in doc.DocumentNode.SelectNodes($"//table[#class='{classValue}']") ?? Enumerable.Empty<HtmlNode>())
{
var resultTable = new DataTable(table.Id);
foreach (HtmlNode row in table.SelectNodes("//tr"))
{
var headerCells = row.SelectNodes("th");
if (headerCells != null)
{
foreach (HtmlNode cell in headerCells)
{
resultTable.Columns.Add(cell.InnerText);
}
}
var dataCells = row.SelectNodes("td");
if (dataCells != null)
{
var dataRow = resultTable.NewRow();
for (int i = 0; i < dataCells.Count; i++)
{
dataRow[i] = dataCells[i].InnerText;
}
resultTable.Rows.Add(dataRow);
}
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
The URL i trying to web scrape : https://www.wsj.com/market-data/quotes/MY/XKLS/0146/financials/annual/cash-flow
I did try to do looping to skip if it was having the same name but it will prompt that the column unable to find when I try to debug.
Is there any solution that can help to solve this? In the end I will need to export the datatable to csv/excel file.
Thanks
I think you want to do this instead:
foreach (HtmlNode table in doc.DocumentNode.SelectNodes($"//table[#class='{classValue}']") ?? Enumerable.Empty<HtmlNode>())
{
var resultTable = new DataTable(table.Id);
// select all the headers and add them to the table
var headerCells = table.SelectNodes("thead/tr/th");
if (headerCells != null)
{
foreach (HtmlNode cell in headerCells)
{
resultTable.Columns.Add(cell.InnerText);
}
}
// select all the rows and add them to the table
foreach (HtmlNode row in table.SelectNodes("tbody/tr"))
{
var dataCells = row.SelectNodes("td");
if (dataCells != null)
{
var dataRow = resultTable.NewRow();
for (int i = 0; i < dataCells.Count; i++)
{
dataRow[i] = dataCells[i].InnerText;
}
resultTable.Rows.Add(dataRow);
}
}
}
The header section and the data section each have their own loop rather than the header section being nested in the data loop. We're also being more explicit about where we want data from: the header should come from thead/tr/th and the data should come from tbody/tr.
I am trying the get all user media from the instagram api and store into database but how can do that i don't know. i am write the code but using this code just add one media in the database. any one have the idea then please let me know how can do that. here below list the my code.
This is my C# method :
public string makePostFromInstagram()
{
var serializer1 = new System.Web.Script.Serialization.JavaScriptSerializer();
var nodes1 = serializer1.Deserialize<dynamic>(GetData(strInstagramUserId));
foreach (var date in nodes1)
{
if (date.Key == "data")
{
string theKey = date.Key;
var thisNode = date.Value;
int userCount = 0;
foreach (var post in thisNode)
{
if (thisNode[userCount]["username"] == strInstagramUserId)
{
id = thisNode[userCount]["id"].ToString();
}
userCount++;
}
}
}
var serializer = new System.Web.Script.Serialization.JavaScriptSerializer();
Dictionary<string, object> csObj = serializer.Deserialize<Dictionary<string, object>>(GetRecentPost(id, accessToken));
int length = ((ArrayList)csObj["data"]).Count;
var nodes = serializer.Deserialize<dynamic>(GetRecentPost(id, accessToken));
foreach (var date in nodes)
{
if (date.Key == "data")
{
string theKey = date.Key;
var thisNode = date.Value;
foreach (var post in thisNode)
{
UsersOnInstagram objUserInsta = new UsersOnInstagram();
string result = null;
//here i am add just one image so i want to here add multiple image insert
if (post["type"] == "image")
result = UsersOnInstagram.addInstagramPost(strPtId, HttpUtility.UrlEncode(post["caption"]["text"]), post["images"]["standard_resolution"]["url"], UnixTimeStampToDateTime(Convert.ToDouble(post["created_time"])), null, post["type"]);
else if (post["type"] == "video")
result = objUserInsta.addInstagramPost(HttpUtility.UrlEncode(post["caption"]["text"]), strPtId, post["images"]["standard_resolution"]["url"], UnixTimeStampToDateTime(Convert.ToDouble(post["created_time"])), post["videos"]["standard_resolution"]["url"], post["type"]);
}
}
}
Response.End();
}
this is my api method :
public static string GetRecentPost(string instagramaccessid, string instagramaccesstoken)
{
Double MAX_TIMESTAMP = DateTimeToUnixTimestamp(DateTime.Today.AddDays(-1));
Double MIN_TIMESTAMP = DateTimeToUnixTimestamp(DateTime.Today.AddDays(-2));
string url = "https://api.instagram.com/v1/users/" + instagramaccessid + "/media/recent?access_token=" + instagramaccesstoken + "&min_timestamp=" + MIN_TIMESTAMP + "&maz_timestamp=" + MAX_TIMESTAMP;
var webClient = new System.Net.WebClient();
string d = webClient.DownloadString(url);
return d;
}
any one know how can do that please let me know.
i want that my variable var repFolderTree hold old value with new value .
foreach (DataRow row in _dt.Rows)
{
string strFolderData = row["ReportFolder"].ToString();
var repFolderTree = crcr.GetAllReportsHierarchical(username, strFolderData);
repFolderTree.FolderName = "All Reports";
uxAllCatalogHierarchical.Text = string.Format("<div class=\"hierarchicalCatalog\">{0}</div>", HierarchicalCatalogView(repFolderTree, 0, showFolder));
}
public CrissCrossLib.Hierarchical.CrcReportFolder GetAllReportsHierarchical(string username,string path)
{
var hierItems = GetAllReportsHierarchicalNoCache(username, path);
m_cacheManager.AllReportsHierarchicalCacheByUsername.Add(username, hierItems);
return hierItems;
}
private string HierarchicalCatalogView(CrcReportFolder rootFolder, int level, string showFolder)
{
_dt = _ssrsDAC.GetReportListByUser(Convert.ToInt32(Session["LoginID"]));
StringBuilder sb = new StringBuilder();
sb.Append("<div class=\"folderBox\">");
string scrollTo = "";
if (PathMatch(showFolder, rootFolder.Path))
scrollTo = " scrollToFolder";
sb.AppendFormat("<div class=\"folderName{1}\">{0}</div>", rootFolder.FolderName, scrollTo);
string show = "none";
if (level == 0 || PathContains(showFolder, rootFolder.Path))
show = "block";
sb.AppendFormat("<div class=\"folderChildren\" style=\"display:{0}\">", show);
foreach (CrcReportFolder subFolderLoop in rootFolder.SubFolders)
sb.Append(HierarchicalCatalogView(subFolderLoop, level + 1, showFolder));
foreach (CrcReportItem itemLoop in rootFolder.Reports)
{
string str = itemLoop.DisplayName;
DataRow[] foundAuthors = _dt.Select("ReportName = '" + str + "'");
if (foundAuthors.Length != 0)
{
sb.Append("<div class=\"reportRow\">");
sb.AppendFormat("<a class=\"reportLink vanillaHover\" href=\"Report.aspx?path={0}\" >{1}</a>",
Server.UrlEncode(itemLoop.ReportPath), itemLoop.DisplayName);
if (!string.IsNullOrEmpty(itemLoop.ShortDescription))
sb.AppendFormat("<div class=\"reportInfo\">{0}</div>", itemLoop.ShortDescription);
sb.Append("<div class=\"clear\"></div></div>");
}
}
sb.Append("</div></div>");
return sb.ToString();
}
i have a control where i am listing all the value that i am getting from
var repFolderTree = crcr.GetAllReportsHierarchical(username, strFolderData);
so every time loop after that i lost the last value and contain the current value. so i want that i can get all the value after the loop and bind on this control that i am doing in this this line of code
uxAllCatalogHierarchical.Text = string.Format("<div class=\"hierarchicalCatalog\">{0}</div>", HierarchicalCatalogView(repFolderTree, 0, showFolder));
i think my code make some scene for you .
you can use List or Collection to store all the values , with Add operation to add the var value.
List<object> repFolderTree = new List<object>();
foreach (DataRow row in _dt.Rows)
{
string strFolderData = row["ReportFolder"].ToString();
var repFolderTree = crcr.GetAllReportsHierarchical(username, strFolderData);
repFolderTree .Add(repFolderTree );
repFolderTree.FolderName = "All Reports";
uxAllCatalogHierarchical.Text = string.Format("<div class=\"hierarchicalCatalog\">{0}</div>", HierarchicalCatalogView(repFolderTree, 0, showFolder));
}
i am making a Job reporter application, and till now what i did is, imported C.S.V file to grid view and displaying it by saving it in a data table, now what i want is,update and save the record back in the c.s.v file, i am not using any S.Q.L or any type of database, is it possible to do this?
please help me, i have to deliver project in two hours.
The project is C# Win forms.
Also help me in how i can serialize it to upload into ftp server.
THE CODE IS here...
private void openProjectToolStripMenuItem_Click_1(object sender, EventArgs e)
{
// int size = -1;
OpenFileDialog ofd = new OpenFileDialog()
{
Title = "Choose a File",
InitialDirectory = #"c:\dev\",
Filter = "Text Files (.txt)|*.txt|XML Files|*.xml|Word Documents (.docx)|*.docx",
RestoreDirectory = true,
Multiselect = false
};
if (ofd.ShowDialog() != System.Windows.Forms.DialogResult.OK)
{
MessageBox.Show("No file selected!");
return;
}
using (StreamReader oStreamReader = new StreamReader(ofd.FileName))
{
try
{
Application.DoEvents();
DataSet ds = new DataSet("ApplicationData");
//some updates in the Datatable
DataTable JobHeaderDataTable = new DataTable("JobHeaderDataTable");
DataTable JobDate = new DataTable("JobDate");
DataTable JobDateItems = new DataTable("JobDateItems");
ds.Tables.Add(JobHeaderDataTable);
ds.Tables.Add(JobDate);
ds.Tables.Add(JobDateItems);
int rowCount = 0;
string[] columnNames = null;
string[] oStreamDataValues = null;
while (!oStreamReader.EndOfStream)
{
string oStreamRowData = oStreamReader.ReadLine().Trim();
if (oStreamRowData.Length > 0)
{
oStreamDataValues = oStreamRowData.Split('-');
if (rowCount == 0 && oStreamDataValues[0].ToString() == "HDR")
{
rowCount = 1;
columnNames = oStreamDataValues;
for (int i = 1; i < columnNames.Length; i++)
{
DataColumn oDataColumn = new DataColumn(columnNames[i].ToUpper(), typeof(string));
oDataColumn.DefaultValue = string.Empty;
JobHeaderDataTable.Columns.Add(oDataColumn);
}
//// For Slider
//txtCompany.Text = oStreamDataValues.GetValue(1).ToString();
//txtLocation.Text = oStreamDataValues.GetValue(2).ToString();
//txtRigName.Text = oStreamDataValues.GetValue(3).ToString();
//txtState.Text = oStreamDataValues.GetValue(4).ToString();
//txtCounty.Text = oStreamDataValues.GetValue(5).ToString();
//txtWellName.Text = oStreamDataValues.GetValue(6).ToString();
//txtTownship.Text = oStreamDataValues.GetValue(7).ToString();
//txtDescription.Text = oStreamDataValues.GetValue(8).ToString();
//txtBentHstSub.Text = oStreamDataValues.GetValue(9).ToString();
//txtBilToBend.Text = oStreamDataValues.GetValue(10).ToString();
//txtPadOD.Text = oStreamDataValues.GetValue(11).ToString();
//txtNBStab.Text = oStreamDataValues.GetValue(11).ToString();
//txtJob_ID.Text = oStreamDataValues.GetValue(12).ToString();
//// For Header
//txtCompanyHeader.Text = oStreamDataValues.GetValue(1).ToString();
//txtLocationHeader.Text = oStreamDataValues.GetValue(2).ToString();
//txtRigNameHeader.Text = oStreamDataValues.GetValue(3).ToString();
//txtStateHeader.Text = oStreamDataValues.GetValue(4).ToString();
//txtCountyHeader.Text = oStreamDataValues.GetValue(5).ToString();
//txtWellNameHeader.Text = oStreamDataValues.GetValue(6).ToString();
//txtTownshipHeader.Text = oStreamDataValues.GetValue(7).ToString();
//txtDescriptionHeader.Text = oStreamDataValues.GetValue(8).ToString();
//txtBentHstSubHeader.Text = oStreamDataValues.GetValue(9).ToString();
//txtBillToBendHeader.Text = oStreamDataValues.GetValue(10).ToString();
//txtPadODHeader.Text = oStreamDataValues.GetValue(11).ToString();
//txtNBStabHeader.Text = oStreamDataValues.GetValue(11).ToString();
//txtJob_IDHeader.Text = oStreamDataValues.GetValue(12).ToString();
}
else
{
DataRow oDataRow = JobHeaderDataTable.NewRow();
for (int i = 1; i < columnNames.Length; i++)
{
oDataRow[columnNames[i]] = oStreamDataValues[i] == null ? string.Empty : oStreamDataValues[i].ToString();
}
JobHeaderDataTable.Rows.Add(oDataRow);
}
}
}
oStreamReader.Close();
oStreamReader.Dispose();
foreach (DataRow dr in JobHeaderDataTable.Rows)
{
dataGridView2.DataSource = JobHeaderDataTable;
dataGridView4.DataSource = JobDate;
dataGridView5.DataSource = JobDateItems;
}
}
catch (IOException)
{
}
}
}
Save yourself some headache and check out, ClosedXML, and create the csv using this answer is available Can I save an EXCEL worksheet as CSV via ClosedXML?.
System.IO.File.WriteAllLines(csvFileName,
worksheet.RowsUsed().Select(row =>
string.Join(";", row.Cells(1, row.LastCellUsed(false).Address.ColumnNumber)
.Select(cell => cell.GetValue<string>()))
));
I'm trying to implement a function within a Class method and I am somewhat new to C#.
Basically, I have a method that iterates through row in a database and sets values to variables. Then, if a document has been created already it updates the document, if not it creates the document. I'm trying to factor out some of the code and I don't know where to put it as it still needs a reference to my variables. I would like to factor out the repeated items in the if else statement.
private void SyncKinases()
{
DataSet ds = new DataSet();
ds = gn.ExecuteQuery("dx.kinasedatasheet.selectstagingkinases", null);
TreeProvider tp = new TreeProvider(ui);
VersionManager vm = new VersionManager(tp);
TreeNode node;
WorkflowManager wm = new WorkflowManager(tp);
if (ds.Tables[0].Rows.Count > 0)
{
foreach (DataRow dr in ds.Tables[0].Rows)
{
string className = "dx.kinasedatasheet";
string title = dr["Title"].ToString();
string technologyPlatform = dr["TechnologyPlatform"].ToString();
string ambitGeneSymbol = dr["AmbitGeneSymbol"].ToString();
string targetDescription = dr["TargetDescription"].ToString();
string entrezGeneSymbol = dr["EntrezGeneSymbol"].ToString();
int entrezGeneID = int.Parse(dr["EntrezGeneID"].ToString());
string aliases = dr["Aliases"].ToString();
string kinaseGroup = dr["KinaseGroup"].ToString();
string species = dr["Species"].ToString();
string accessionNumber = dr["AccessionNumber"].ToString();
string kinaseConstruct = dr["KinaseConstruct"].ToString();
string kinaseForm = dr["KinaseForm"].ToString();
string expressionSystem = dr["ExpressionSystem"].ToString();
double avgZValue = 0;
if (!(dr["AverageZValue"] is DBNull))
{
avgZValue = double.Parse(dr["AverageZValue"].ToString());
}
string panel = dr["Panel"].ToString();
string compoundsKds = dr["CompoundsKds"].ToString();
string mutationRelevance = dr["MutationRelevance"].ToString();
string mutationReferences = dr["MutationReferences"].ToString();
string kinaseAliasPath = "/Kinase-Data-Sheets";
if (!(dr["NodeID"] is System.DBNull))
{
node = tp.SelectSingleNode(int.Parse(dr["NodeID"].ToString()));
vm.CheckOut(node);
node.DocumentName = ambitGeneSymbol;
node.NodeName = ambitGeneSymbol;
node.SetValue("Title", title);
node.SetValue("TechnologyPlatform", technologyPlatform);
node.SetValue("AmbitGeneSymbol", ambitGeneSymbol);
node.SetValue("TargetDescription", targetDescription);
node.SetValue("EntrezGeneSymbol", entrezGeneSymbol);
node.SetValue("EntrezGeneID", entrezGeneID);
node.SetValue("Aliases", aliases);
node.SetValue("KinaseGroup", kinaseGroup);
node.SetValue("Species", species);
node.SetValue("AccessionNumber", accessionNumber);
node.SetValue("KinaseConstruct", kinaseConstruct);
node.SetValue("KinaseForm", kinaseForm);
node.SetValue("ExpressionSystem", expressionSystem);
if (!(dr["AverageZValue"] is DBNull))
{
node.SetValue("AverageZValue", avgZValue);
}
node.SetValue("Panel", panel);
node.SetValue("CompoundsKds", compoundsKds);
node.SetValue("MutationRelevance", mutationRelevance);
node.SetValue("MutationReferences", mutationReferences);
node.SetValue("DocumentPublishTo", null);
node.Update();
updatedKinaseCount++;
vm.CheckIn(node, null, null);
WorkflowInfo wi = wm.GetNodeWorkflow(node);
if (node.IsPublished)
{
wm.AutomaticallyPublish(node, wi, null);
}
}
else
{
node = TreeNode.New(className, tp);
node.DocumentName = ambitGeneSymbol;
node.NodeName = ambitGeneSymbol;
node.SetValue("Title", title);
node.SetValue("TechnologyPlatform", technologyPlatform);
node.SetValue("AmbitGeneSymbol", ambitGeneSymbol);
node.SetValue("TargetDescription", targetDescription);
node.SetValue("EntrezGeneSymbol", entrezGeneSymbol);
node.SetValue("EntrezGeneID", entrezGeneID);
node.SetValue("Aliases", aliases);
node.SetValue("KinaseGroup", kinaseGroup);
node.SetValue("Species", species);
node.SetValue("AccessionNumber", accessionNumber);
node.SetValue("KinaseConstruct", kinaseConstruct);
node.SetValue("KinaseForm", kinaseForm);
node.SetValue("ExpressionSystem", expressionSystem);
if (!(dr["AverageZValue"] is DBNull))
{
node.SetValue("AverageZValue", avgZValue);
}
node.SetValue("Panel", panel);
node.SetValue("CompoundsKds", compoundsKds);
node.SetValue("MutationRelevance", mutationRelevance);
node.SetValue("MutationReferences", mutationReferences);
node.SetValue("DocumentPublishTo", null);
node.SetValue("DocumentCulture", "en-US");
TreeNode parentNode = tp.SelectSingleNode("DiscoveRx", kinaseAliasPath, "en-US");
node.Insert(parentNode);
//vm.CheckIn(node, null, null);
newKinaseCount++;
}
}
}
ArchiveItems(archivedKinaseCount, "dx.kinasedatasheet.selectarchivekinases");
}
In addition to refactoring your routine I'd recommend creating some extension methods to save you some typing. For example, here's the an extension for parsing your doubles:
public static class Extensions
{
public static double ToDoubleIfNotDBNull(this object item)
{
if (item is DBNULL) return 0;
return double.Parse(item.ToString());
}
}
So then your code becomes:
double avgZValue = dr["AverageZValue"].ToDoubleIfNotDBNull();
You can just refactor your code so you don't need to set the node values in different cases:
private void SyncKinases()
{
DataSet ds = new DataSet();
ds = gn.ExecuteQuery("dx.kinasedatasheet.selectstagingkinases", null);
TreeProvider tp = new TreeProvider(ui);
VersionManager vm = new VersionManager(tp);
TreeNode node;
WorkflowManager wm = new WorkflowManager(tp);
if (ds.Tables[0].Rows.Count > 0)
{
foreach (DataRow dr in ds.Tables[0].Rows)
{
string className = "dx.kinasedatasheet";
string title = dr["Title"].ToString();
string technologyPlatform = dr["TechnologyPlatform"].ToString();
string ambitGeneSymbol = dr["AmbitGeneSymbol"].ToString();
string targetDescription = dr["TargetDescription"].ToString();
string entrezGeneSymbol = dr["EntrezGeneSymbol"].ToString();
int entrezGeneID = int.Parse(dr["EntrezGeneID"].ToString());
string aliases = dr["Aliases"].ToString();
string kinaseGroup = dr["KinaseGroup"].ToString();
string species = dr["Species"].ToString();
string accessionNumber = dr["AccessionNumber"].ToString();
string kinaseConstruct = dr["KinaseConstruct"].ToString();
string kinaseForm = dr["KinaseForm"].ToString();
string expressionSystem = dr["ExpressionSystem"].ToString();
double avgZValue = 0;
if (!(dr["AverageZValue"] is DBNull))
{
avgZValue = double.Parse(dr["AverageZValue"].ToString());
}
string panel = dr["Panel"].ToString();
string compoundsKds = dr["CompoundsKds"].ToString();
string mutationRelevance = dr["MutationRelevance"].ToString();
string mutationReferences = dr["MutationReferences"].ToString();
string kinaseAliasPath = "/Kinase-Data-Sheets";
bool isNewNode = false;
if (!(dr["NodeID"] is System.DBNull))
{
node = tp.SelectSingleNode(int.Parse(dr["NodeID"].ToString()));
vm.CheckOut(node);
}
else
{
node = TreeNode.New(className, tp);
node.SetValue("DocumentCulture", "en-US");
isNewNewNode = true;
}
node.DocumentName = ambitGeneSymbol;
node.NodeName = ambitGeneSymbol;
node.SetValue("Title", title);
node.SetValue("TechnologyPlatform", technologyPlatform);
node.SetValue("AmbitGeneSymbol", ambitGeneSymbol);
node.SetValue("TargetDescription", targetDescription);
node.SetValue("EntrezGeneSymbol", entrezGeneSymbol);
node.SetValue("EntrezGeneID", entrezGeneID);
node.SetValue("Aliases", aliases);
node.SetValue("KinaseGroup", kinaseGroup);
node.SetValue("Species", species);
node.SetValue("AccessionNumber", accessionNumber);
node.SetValue("KinaseConstruct", kinaseConstruct);
node.SetValue("KinaseForm", kinaseForm);
node.SetValue("ExpressionSystem", expressionSystem);
if (!(dr["AverageZValue"] is DBNull))
{
node.SetValue("AverageZValue", avgZValue);
}
node.SetValue("Panel", panel);
node.SetValue("CompoundsKds", compoundsKds);
node.SetValue("MutationRelevance", mutationRelevance);
node.SetValue("MutationReferences", mutationReferences);
node.SetValue("DocumentPublishTo", null);
if (isNewNode)
{
TreeNode parentNode = tp.SelectSingleNode("DiscoveRx", kinaseAliasPath, "en-US");
node.Insert(parentNode);
//vm.CheckIn(node, null, null);
newKinaseCount++;
}
else
{
node.Update();
updatedKinaseCount++;
vm.CheckIn(node, null, null);
WorkflowInfo wi = wm.GetNodeWorkflow(node);
if (node.IsPublished)
{
wm.AutomaticallyPublish(node, wi, null);
}
}
}
}
ArchiveItems(archivedKinaseCount, "dx.kinasedatasheet.selectarchivekinases");
}
You now also don't need all of those temporary variables from dr columns since they will only be used once. Removing those will make your method much shorter and more readable.
Just create a new method and send the values as parameters.
void SetNodeValues(Node node, DataRow row)
{
string title = dr["Title"].ToString();
....
node.SetValue("Title", title);
...
}
You might be able to do it all with a for loop (untested and not match your variables)
foreach(var col in Table.Columns)
node.SetValue(col.Name, dr[col]);
If you were using an ORM, you could send an object instead of the DataRow but that is beyond the scope of this question.