How do I use lucene.net for searching file content? - c#

I am currently using lucene.net to search the content of files for keyword search. I am able to get the results correctly but I have a scenario where I need to display the keywords found in a particular file.
There are two different files containing "karthik" and "steven", and if I search for "karthik and steven" I am able to get both the files displayed. If I search only for "karthik" and "steven" separately, only the respective files are getting displayed.
When I search for "karthik and steven" simultaneously I get both the files in the result as I am displaying the filename alone, and now I need to display the particular keyword found in that particular file as a record in the listview.
Public bool StartSearch()
{
bool bResult = false;
Searcher objSearcher = new IndexSearcher(mstrIndexLocation);
Analyzer objAnalyzer = new StandardAnalyzer();
try
{
//Perform Search
DateTime dteStart = DateTime.Now;
Query objQuery = QueryParser.Parse(mstrSearchFor, "contents", objAnalyzer);
Hits objHits = objSearcher.Search(objQuery, objFilter);
DateTime dteEnd = DateTime.Now;
mlngTotalTime = (Date.GetTime(dteEnd) - Date.GetTime(dteStart));
mlngNumHitsFound = objHits.Length();
//GeneratePreviewText(objQuery, mstrSearchFor,objHits);
//Generate results - convert to XML
mstrResultsXML = "";
if (mlngNumHitsFound > 0)
{
mstrResultsXML = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?><Results>";
//Loop through results
for (int i = 0; i < objHits.Length(); i++)
{
try
{
//Get the next result
Document objDocument = objHits.Doc(i);
//Extract the data
string strPath = objDocument.Get("path");
string strFileName = objDocument.Get("name");
if (strPath == null) { strPath = ""; }
string strLastWrite = objDocument.Get("last_write_time");
if (strLastWrite == null)
strLastWrite = "unavailable";
else
{
strLastWrite = DateField.StringToDate(strLastWrite).ToShortDateString();
}
double dblScore = objHits.Score(i) * 100;
string strScore = String.Format("{0:00.00}", dblScore);
//Add results as an XML row
mstrResultsXML += "<Row>";
//mstrResultsXML += "<Sequence>" + (i + 1).ToString() + "</Sequence>";
mstrResultsXML += "<Path>" + strPath + "</Path>";
mstrResultsXML += "<FileName>" + strFileName + "</FileName>";
//mstrResultsXML += "<Score>" + strScore + "%" + "</Score>";
mstrResultsXML += "</Row>";
}
catch
{
break;
}
}
//Finish off XML
mstrResultsXML += "</Results>";
//Build Dataview (to bind to datagrid
DataSet objDS = new DataSet();
StringReader objSR = new StringReader(mstrResultsXML);
objDS.ReadXml(objSR);
objSR = null;
mobjResultsDataView = new DataView();
mobjResultsDataView = objDS.Tables[0].DefaultView;
}
//Finish up
objSearcher.Close();
bResult = true;
}
catch (Exception e)
{
mstrError = "Exception: " + e.Message;
}
finally
{
objSearcher = null;
objAnalyzer = null;
}
return bResult;
}
Above is the code i am using for search and the xml i am binding to the listview, now i need to tag the particular keywords found in the respective document and display it in the listview as recordsss,simlar to the below listview
No FileName KeyWord(s)Found
1 Test.Doc karthik
2 Test2.Doc steven
i hope u guys undesrtood the question,

This depends on how your documents were indexed. You'll need to extract the original content, pass it through the analyzer to get the indexed tokens, and check which matches the generated query.
Just go with the Highlighter.Net package, part of contrib, which does this and more.

Related

How can I find a phrase anywhere in a String Array?

I need to see if any phrase, such as "duckbilled platypus" appears in a string array.
In the case I'm testing, the phrase does exist in the string list, as shown here:
Yet, when I look for that phrase, as shown here:
...it fails to find it. I never get past the "if (found)" gauntlet in the code below.
Here is the code that I'm using to try to traverse through the contents of one doc to see if any phrase (two words or more) are found in both documents:
private void FindAndStorePhrasesFoundInBothDocs()
{
string[] doc1StrArray;
string[] doc2StrArray;
slPhrasesFoundInBothDocs = new List<string>();
slAllDoc1Words = new List<string>();
int iCountOfWordsInDoc1 = 0;
int iSearchStartIndex = 0;
int iSearchEndIndex = 1;
string sDoc1PhraseToSearchForInDoc2;
string sFoundPhrase;
bool found;
int iLastWordIndexReached = iSearchEndIndex;
try
{
doc1StrArray = File.ReadAllLines(sDoc1Path, Encoding.UTF8);
doc2StrArray = File.ReadAllLines(sDoc2Path, Encoding.UTF8);
foreach (string line in doc1StrArray)
{
string[] subLines = line.Split();
foreach (string whirred in subLines)
{
if (String.IsNullOrEmpty(whirred)) continue;
slAllDoc1Words.Add(whirred);
}
}
iCountOfWordsInDoc1 = slAllDoc1Words.Count();
sDoc1PhraseToSearchForInDoc2 = slAllDoc1Words[iSearchStartIndex] + ' ' + slAllDoc1Words[iSearchEndIndex];
while (iLastWordIndexReached < iCountOfWordsInDoc1 - 1)
{
sFoundPhrase = string.Empty;
// Search for the phrase from doc1 in doc2;
found = doc2StrArray.Contains(sDoc1PhraseToSearchForInDoc2);
if (found)
{
sFoundPhrase = sDoc1PhraseToSearchForInDoc2;
iSearchEndIndex++;
sDoc1PhraseToSearchForInDoc2 = sDoc1PhraseToSearchForInDoc2 + ' ' + slAllDoc1Words[iSearchEndIndex];
}
else //if not found, inc vals of BOTH int args and, if sFoundPhrase not null, assign to sDoc1PhraseToSearchForInDoc2 again.
{
iSearchStartIndex = iSearchEndIndex;
iSearchEndIndex = iSearchStartIndex + 1;
if (!string.IsNullOrWhiteSpace(sFoundPhrase)) // add the previous found phrase if there was one
{
slPhrasesFoundInBothDocs.Add(sFoundPhrase);
}
sDoc1PhraseToSearchForInDoc2 = slAllDoc1Words[iSearchStartIndex] + ' ' + slAllDoc1Words[iSearchEndIndex];
} // if/else
iLastWordIndexReached = iSearchEndIndex;
} // while
} // try
catch (Exception ex)
{
MessageBox.Show("FindAndStorePhrasesFoundInBothDocs(); iSearchStartIndex = " + iSearchStartIndex.ToString() + "iSearchEndIndex = " + iSearchEndIndex.ToString() + " iLastWordIndexReached = " + iLastWordIndexReached.ToString() + " " + ex.Message);
}
}
doc2StrArray does contain the phrase sought, so why does doc2StrArray.Contains(sDoc1PhraseToSearchForInDoc2) fail?
This should do what you want:
found = Array.FindAll(doc2StrArray, s => s.Contains(sDoc1PhraseToSearchForInDoc2));
In List<T>, Contains() looking for an T, Here in your code to found be true must have all the text in particular index (NOT part of it).
Try this
var _list = doc2StrArray.ToList();
var found = _list.FirstOrDefault( w => w.Contains( sDoc1PhraseToSearchForInDoc2 ) ) != null;

Search raw text from url, using keyword from textbox

Me and my buddy Xylophone have been at this for hours and cant figure this out, any help would be appreciated. I'm basically trying to read all the text from that URL and search for a keyword.
if (comboBoxEdit1.Text == "Hello")
{
label2.Text = "Current Status: Searching...";
this.dataGridView3.ScrollBars = ScrollBars.None;
this.dataGridView3.MouseWheel += new MouseEventHandler(mousewheel);
dataGridView3.Rows.Clear();
string line;
int row = 0;
List<String> LinesFound = new List<string>();
StreamReader file = new StreamReader("https://pastebin.com/raw/fWxKdRjN");
while ((line = file.ReadLine()) != null)
{
if (line.Contains(textEdit1.Text))
{
string[] Columns = line.Split(':');
dataGridView3.Rows.Add(line);
for (int i = 0; i < Columns.Length; i++)
{
dataGridView3[i, row].Value = Columns[i];
}
row++;
label2.Text = "Current Status: " + dataGridView3.Rows.Count + " Matche(s) Found";
}
else if (dataGridView3.RowCount == 0)
{
label2.Text = "Current Status: No Matche(s) Found";
}
}
}
You are doing it all wrong, if you want to read and pars the html content of the web page, you need to fetch the page using httpClient, or better take look at this library https://html-agility-pack.net/
We can use regular expression to check if 'raw' exist in the URL.
Regex.Matches() function will return an array with all occurrence of the match.
We can then use count property to find the no of occurence.
Regular expression to match raw in a url:(raw)
Below is the working code snippet:
public static void Main()
{
string pattern = #"(raw)";
Regex rgx = new Regex(pattern);
string url = "https://pastebin.com/raw/fWxKdRjN";
if (rgx.Matches(url).Count>0){
Console.WriteLine(Current Status: " + rgx.Matches(url).Count + " Matche(s) Found");
}
else {
Console.WriteLine("Current Status: No Matche(s) Found");
}
}

I want to Optimize upload time of Excel file records saving to Database in asp.net mvc

I want to upload the excel file of record 2500. This process takes time more than 5 minutes approximate. I want to optimize it to less than a minute maximum.
Find the best way of optimization of that code. I am using Dbgeography for location storing. File is uploaded and save the data properly. Everything is working find but I need optimization.
public ActionResult Upload(HttpPostedFileBase FileUpload)
{
if (FileUpload.ContentLength > 0)
{
string fileName = Path.GetFileName(FileUpload.FileName);
string ext = fileName.Substring(fileName.LastIndexOf('.'));
Random r = new Random();
int ran = r.Next(1, 13);
string path = Path.Combine(Server.MapPath("~/App_Data/uploads"), DateTime.Now.Ticks + "_" + ran + ext);
try
{
FileUpload.SaveAs(path);
ProcessCSV(path);
ViewData["Feedback"] = "Upload Complete";
}
catch (Exception ex)
{
ViewData["Feedback"] = ex.Message;
}
}
return View("Upload", ViewData["Feedback"]);
}
Here is other method from where the file is uploaded and save it to the database..
private void ProcessCSV(string filename)
{
Regex r = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
StreamReader sr = new StreamReader(filename);
string line = null;
string[] strArray;
int iteration = 0;
while ((line = sr.ReadLine()) != null)
{
if (iteration == 0)
{
iteration++;
continue; //Because Its Heading
}
strArray = r.Split(line);
StoreLocation store = new StoreLocation();
store.Name = strArray[0];
store.StoreName = strArray[1];
store.Street = strArray[2];
store.Street2 = strArray[3];
store.St_City = strArray[4];
store.St_St = strArray[5];
store.St_Zip = strArray[6];
store.St_Phone = strArray[7];
store.Office_Phone = strArray[8];
store.Fax = strArray[9];
store.Ownership = strArray[10];
store.website = strArray[11];
store.Retailer = check(strArray[12]);
store.WarehouseDistributor = check(strArray[13]);
store.OnlineRetailer = check(strArray[14]);
string temp_Add = store.Street + "," + store.St_City;
try
{
var point = new GoogleLocationService().GetLatLongFromAddress(temp_Add);
string points = string.Format("POINT({0} {1})", point.Latitude, point.Longitude);
store.Location = DbGeography.FromText(points);//lat_long
}
catch (Exception e)
{
continue;
}
db.StoreLocations.Add(store);
db.SaveChanges();
}
The obvious place to start would be your external call to the Geo Location service, which it looks like you are doing for each iteration of the loop. I don't know that service, but is there any way you can build up a list of addresses in memory, and then hit it once with multiple addresses, then go back and amend all the records you need to update?
The call to db.SaveChanges() which updates the database, this is happening for every line.
while (...)
{
...
db.StoreLocations.Add(store);
db.SaveChanges(); // Many database updates
}
Instead, just call it once at the end:
while (...)
{
...
db.StoreLocations.Add(store);
}
db.SaveChanges(); // One combined database update
This should speed up your code nicely.

Unable to perform Select object in given drawing in autocad using C#?

I need to open the given path drawing file and then select the All lines of the drawing and then list number of the lines. I cannot do these things because while i open and then set the MdiActiveDocument method is returned. Below lines are not executed.
private static int GetNumberofLines(string drawingName)
{
int lineCount = 0;
DocumentCollection docMgr = AutoCAD.ApplicationServices.Application.DocumentManager;
Document doc = docMgr.Open(drawingName, false);
docMgr.MdiActiveDocument = doc; // in this line method is skipped
TypedValue[] filterlist = new TypedValue[1]; //cursor didn't come this line..
filterlist[0] = new TypedValue((int)DxfCode.Start, "Line");
SelectionFilter filter = new SelectionFilter(filterlist);
PromptSelectionOptions opts = new PromptSelectionOptions();
opts.MessageForAdding = "Select entities to get line counts: ";
PromptSelectionResult prmptSelRslt = doc.Editor.SelectAll(filter);
if (prmptSelRslt.Status != PromptStatus.OK)
{
return 0;
}
else
{
lineCount = prmptSelRslt.Value.Count;
}
return lineCount;
}
Please can anybody tell me how to open and list line number count.
Thanks in Advance....
There are much easier ways to get the information that you need. I wrote a blog post about this subject. You should take a look.
[CommandMethod("getLines")]
public void OpenDrawing()
{
OpenDrawingGetLines(#"C:\saved\linetest.dwg");
}
public void OpenDrawingGetLines(string path)
{
var editor = Application.DocumentManager.MdiActiveDocument.Editor;
if (!File.Exists(path))
{
editor.WriteMessage(string.Format(#"{0} doesn't exist.", path));
return;
}
// wrap in using statement to open and close.
var doc = Application.DocumentManager.Open(path, false);
string docName = string.Empty;
IEnumerable<ObjectId> lineIds;
// lock the doc
using (doc.LockDocument())
// start transaction
using (var trans = doc.TransactionManager.StartOpenCloseTransaction())
{
// get the modelspace
var modelspace = (BlockTableRecord)
trans.GetObject(SymbolUtilityServices.GetBlockModelSpaceId(doc.Database), OpenMode.ForRead);
// get the lines ObjectIds
lineIds = from id in modelspace.Cast<ObjectId>()
where id.ObjectClass.IsDerivedFrom(RXObject.GetClass(typeof(Line)))
select id;
docName = doc.Name;
trans.Commit();
}
var message = string.Format("{0} Lines in {1}", lineIds.Count(), docName);
editor.WriteMessage(message);
Application.ShowAlertDialog(message);
}
You Should Use the DocumentCollectionExtension. (In VB)
Dim strFileName As String = "C:\campus.dwg"
Dim acDocMgr As DocumentCollection = Application.DocumentManager
If (File.Exists(strFileName)) Then
DocumentCollectionExtension.Open(acDocMgr, strFileName, False)
Else
acDocMgr.MdiActiveDocument.Editor.WriteMessage("File " & strFileName & _
" does not exist.")
End If

replacing text in a text file with \r\n

Currently I am building an agenda with extra options.
for testing purposes I store the data in a simple .txt file
(after that it will be connected to the agenda of a virtual assistant.)
To change or delete text from this .txt file I have a problem.
Although the part of the content that needs to be replaced and the search string are exactly the same it doesn't replace the text in content.
code:
Change method
public override void Change(List<object> oldData, List<object> newData)
{
int index = -1;
for (int i = 0; i < agenda.Count; i++)
{
if(agenda[i].GetType() == "Task")
{
Task t = (Task)agenda[i];
if(t.remarks == oldData[0].ToString() && t.datetime == (DateTime)oldData[1] && t.reminders == oldData[2])
{
index = i;
break;
}
}
}
string search = "Task\r\nTo do: " + oldData[0].ToString() + "\r\nDateTime: " + (DateTime)oldData[1] + "\r\n";
reminders = (Dictionary<DateTime, bool>) oldData[2];
if(reminders.Count != 0)
{
search += "Reminders\r\n";
foreach (KeyValuePair<DateTime, bool> rem in reminders)
{
if (rem.Value)
search += "speak " + rem.Key + "\r\n";
else
search += rem.Key + "\r\n";
}
}
// get new data
string newRemarks = (string)newData[0];
DateTime newDateTime = (DateTime)newData[1];
Dictionary<DateTime, bool> newReminders = (Dictionary<DateTime, bool>)newData[2];
string replace = "Task\r\nTo do: " + newRemarks + "\r\nDateTime: " + newDateTime + "\r\n";
if(newReminders.Count != 0)
{
replace += "Reminders\r\n";
foreach (KeyValuePair<DateTime, bool> rem in newReminders)
{
if (rem.Value)
replace += "speak " + rem.Key + "\r\n";
else
replace += rem.Key + "\r\n";
}
}
Replace(search, replace);
if (index != -1)
{
remarks = newRemarks;
datetime = newDateTime;
reminders = newReminders;
agenda[index] = this;
}
}
replace method
private void Replace(string search, string replace)
{
StreamReader reader = new StreamReader(path);
string content = reader.ReadToEnd();
reader.Close();
content = Regex.Replace(content, search, replace);
content.Trim();
StreamWriter writer = new StreamWriter(path);
writer.Write(content);
writer.Close();
}
When running in debug I get the correct info:
content "-- agenda --\r\n\r\nTask\r\nTo do: test\r\nDateTime: 16-4-2012 15:00:00\r\nReminders:\r\nspeak 16-4-2012 13:00:00\r\n16-4-2012 13:30:00\r\n\r\nTask\r\nTo do: testing\r\nDateTime: 16-4-2012 9:00:00\r\nReminders:\r\nspeak 16-4-2012 8:00:00\r\n\r\nTask\r\nTo do: aaargh\r\nDateTime: 18-4-2012 12:00:00\r\nReminders:\r\n18-4-2012 11:00:00\r\n" string
search "Task\r\nTo do: aaargh\r\nDateTime: 18-4-2012 12:00:00\r\nReminders\r\n18-4-2012 11:00:00\r\n" string
replace "Task\r\nTo do: aaargh\r\nDateTime: 18-4-2012 13:00:00\r\nReminders\r\n18-4-2012 11:00:00\r\n" string
But it doesn't change the text. How do I make sure that the Regex.Replace finds the right piece of content?
PS. I did check several topics on this, but none of the solutions mentioned there work for me.
You missed a : right after Reminders. Just check it again :)
You could try using a StringBuilder to build up you want to write out to the file.
Just knocked up a quick example in a console app but this appears to work for me and I think it might be what you are looking for.
StringBuilder sb = new StringBuilder();
sb.Append("Tasks\r\n");
sb.Append("\r\n");
sb.Append("\tTask 1 details");
Console.WriteLine(sb.ToString());
StreamWriter writer = new StreamWriter("Tasks.txt");
writer.Write(sb.ToString());
writer.Close();

Categories