Elastic search count query based on field with value containing filesystem path - c#

I asked this question earlier question here
However I realized my mistake very soon as I tried the solution with more data.
So I am back to square one. So I am hoping to ask this question again and get more insights.
My task is still the same but more precisely to get counts of documents based on multiple values including a path field that contains values like system file paths.
My example data look likes this:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 15.9074545,
"hits": [
{
"_index": "stage-data-20210728115212095",
"_type": "_doc",
"_id": "fil.31c425766287497ec5a508d995d1ce36",
"_score": 15.9074545,
"_source": {
"header_action": "uploaded",
"partition": 7,
"offset": 11382619,
"volumeId": "vol.e144f0bc59914725528f08d995ebd8c3",
"lambdaLagMs": 0,
"id": "fil.31c425766287497ec5a508d995d1ce36",
"name": "sampleFile.txt",
"parentFolderId": "fol.6357e749063445b0c5a408d995d1ce36",
"volumeName": "test-vol-b2ee569932dd470788ebc70e6f15bf36",
"type": "text/plain",
"path": "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/sampleFile.txt",
"timeCreated": "2021-10-23T06:10:45.287Z",
"timeModified": "2021-10-23T06:10:45.287Z",
"sizeInBytes": 26,
"isUploaded": true,
"archiveStatus": "None",
"storageTier": "Standard",
"eTag": "ed6a6e795564952d4d9707e7dc91c6a6",
"format": "TXT",
"status": "Available",
"recordDateTime": "2021-10-23 06:10:47.268",
"recordTurnAroundTimeMs": 2629.375,
"dataType": "File"
}
},
{
"_index": "stage-data-20210728115212095",
"_type": "_doc",
"_id": "fil.6075863c66464a2cc5a608d995d1ce36",
"_score": 15.500043,
"_source": {
"header_action": "uploaded",
"partition": 15,
"offset": 11393012,
"volumeId": "vol.e144f0bc59914725528f08d995ebd8c3",
"lambdaLagMs": 0,
"id": "fil.6075863c66464a2cc5a608d995d1ce36",
"name": "testFile.txt",
"parentFolderId": "fol.230c9c8861fa40640cc808d995d1b210",
"volumeName": "test-vol-b2ee569932dd470788ebc70e6f15bf36",
"type": "text/plain",
"path": "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/testFile.txt",
"timeCreated": "2021-10-23T06:10:45.286Z",
"timeModified": "2021-10-23T06:10:45.286Z",
"sizeInBytes": 23,
"isUploaded": true,
"archiveStatus": "None",
"storageTier": "Standard",
"eTag": "2b9f6fc56449eb68b4fa5c5da127c5be",
"format": "TXT",
"status": "Available",
"recordDateTime": "2021-10-23 06:10:47.284",
"recordTurnAroundTimeMs": 2628.936,
"dataType": "File"
}
},
{
"_index": "stage-data-20210728115212095",
"_type": "_doc",
"_id": "fil.27a781dc81554811576308d995d1ce3c",
"_score": 15.500043,
"_source": {
"header_action": "uploaded",
"partition": 6,
"offset": 11377991,
"volumeId": "vol.e144f0bc59914725528f08d995ebd8c3",
"lambdaLagMs": 0,
"id": "fil.27a781dc81554811576308d995d1ce3c",
"name": "smallfile.txt",
"parentFolderId": "fol.6ac9ecb11dae4ebd576208d995d1ce3c",
"volumeName": "test-vol-b2ee569932dd470788ebc70e6f15bf36",
"type": "text/plain",
"path": "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/smallfile.txt",
"timeCreated": "2021-10-23T06:10:45.294Z",
"timeModified": "2021-10-23T06:10:45.294Z",
"sizeInBytes": 1249,
"isUploaded": true,
"archiveStatus": "None",
"storageTier": "Standard",
"eTag": "c6e9338f9e54e39b52dd853908a1aecd",
"status": "Available",
"recordDateTime": "2021-10-23 06:10:47.276",
"recordTurnAroundTimeMs": 2629.8689999999997,
"dataType": "File"
}
}
]
}
}
I am trying to get the count of documents using NEST c# library. Here is my sample code:
var elasticSettings = new ConnectionSettings(new Uri("https://myelasticurl/"))
.DefaultIndex("stage-data");
var client = new ElasticClient(elasticSettings);
var folderPrefix = "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/";
Func<CountDescriptor<dynamic>, ICountRequest> countQueryFilter = q => q.Query(q =>
q.Match(m => m.Field("volumeId").Query("vol.e144f0bc59914725528f08d995ebd8c3"))
&& q.Match(m => m.Field("dataType").Query("File")) &&
q.Wildcard(m => m.Field("path").Value($"{folderPrefix}*")));
var countResponse= client.CountAsync(countQueryFilter);
Console.WriteLine(countResponse.Result.Count);
Here is the mapping of the path field
{
"stage-data-20210728115212095": {
"mappings": {
"path": {
"full_name": "path",
"mapping": {
"path": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"rawlower": {
"type": "keyword",
"normalizer": "lowercase"
},
"tree": {
"type": "text",
"analyzer": "path_analyzer"
},
"tree_level": {
"type": "token_count",
"store": true,
"analyzer": "path_level_analyzer",
"enable_position_increments": false
}
},
"analyzer": "ngram_analyzer"
}
}
}
}
}
}
If I only search for volumeId and dataType, I can get the results just fine. Even for path field, for the dataset where I have files at the root folder for example /folder1/mytxt.txt etc, the query works.
Only when I have files under multiple level deep like in the above example when I try to search for path like this /test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/, I get 0 result count.
At this point I am not sure if I need to tweak the mapping settings of this field to make it more search friendly something like suggested here or if I am just using the wrong method to search this.
Please note I did try following methods for the path searching:
WildCard
Term
Regexp
Match
I got the same results of 0 record returned.
Please suggest what I am missing, thanks for your help in advance.
I am using NEST 7.13.0 on .NET core 3.1.
Regards,
Vikas

One of my colleague helped with this and the solution works nicely.
Here is the sample code:
var elasticSettings = new ConnectionSettings(new Uri("https://myelasticurl/"))
.DefaultIndex("stage-data");
var client = new ElasticClient(elasticSettings);
var folderPrefix = "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/";
Func<CountDescriptor<dynamic>, ICountRequest> countQueryFilter = q => q.Query(q =>
q.Match(m => m.Field("volumeId").Query("vol.e144f0bc59914725528f08d995ebd8c3"))
&& q.Match(m => m.Field("dataType").Query("File")) &&
q.Prefix(m => m.Field("path.raw").Value($"{folderPrefix}")));
var countResponse= client.CountAsync(countQueryFilter);
Console.WriteLine(countResponse.Result.Count);
So basically needed to use Prefix filter along with path.raw that is defined in the mapping.

Related

web api post dynamic type and validation

I developing a method that accepts various messages and returns the some result. The main goal is that message formats can be added without rebuilding the project. Two types of the messages is below:
{
"PushToken": "ksjdfhskfhskdjfhskjdfhk",
"Alertе": "ssffsdfsdfsdfsdfsdfsfs sfsdfsdf sfsdfsdfs",
"Priority": 5,
"IsBackground": false
}
and
{
"DeviceToken": "ksjdfhskfhskdjfhskjdfhkkh7khsdfjk8sdfsdfsddddddddddddddd",
"Message": "ssffsdfsdfsdfsdfsdfsfs sfsdfsdf sfsdfsdfs",
"Title": "asdasdas",
"Condition": "asfdasf"
}
I thinked how to validate messages and decided using json schema's. They are below:
{
"title":"IOS",
"description": "IOS Message",
"type": "object",
"properties": {
"PushToken": { "type": "string", "maxLength": 50 },
"Alert": { "type": "string", "maxLength": 2000 },
"Priority": { "type": "number", "default": 10 },
"IsBackground": { "type": "boolean", "default": true }
},
"required":[
"PushToken",
"Alert"
],
"additionalProperties": false
}
and
{
"title":"Android",
"description": "Android message",
"type": "object",
"properties": {
"DeviceToken": { "type": "string", "maxLength": 50 },
"Message": { "type": "string", "maxLength": 2000 },
"Title": { "type": "string", "maxLength": 255 },
"Condition": { "type": "string", "maxLength": 2000 }
},
"required":[
"DeviceToken",
"Message",
"Title"
],
"additionalProperties": false
}
For validating I using thomething like that:
JSchema schema = JSchema.Parse(iosJsonSchema);
IList<string> errorMessages;
bool valid = iosJsonMessage.IsValid(schema, out errorMessages); //iosJsonMessage - JObject type
In IList<string> messages I receive all errors. In bool valid variable I receive validation result.
Is there a way to check only properties names of the json request? And only those names that are declare in required section of the json schema. I want to do this for understanding the type of the message.
And a more general question. Is this an acceptable solution for task that I described above?
Thanks.

How can I get the value of an entity from LuisResult object? (LUIS with Bot Framework .NET)

Below is the JSON when I call my LUIS api endpoint.
{
"query": "How do I install citrix?",
"topScoringIntent": {
"intent": "Setup Instructions",
"score": 0.9999997
},
"intents": [
{
"intent": "Setup Instructions",
"score": 0.9999997
},
{
"intent": "OS Availability",
"score": 0.0000021111066
},
{
"intent": "Service Guide",
"score": 8.18181149e-7
},
{
"intent": "Service Description",
"score": 5.55555232e-7
},
{
"intent": "None",
"score": 9e-9
},
{
"intent": "Greeting",
"score": 1.41666667e-9
},
{
"intent": "Compassion",
"score": 8.1e-10
},
{
"intent": "Images",
"score": 8.1e-10
}
],
"entities": [
{
"entity": "citrix",
"type": "Service",
"startIndex": 17,
"endIndex": 22,
"resolution": {
"values": [
"Citrix Receiver"
]
},
"role": ""
}
],
"sentimentAnalysis": {
"label": "positive",
"score": 0.7695234
}
}
I am trying to get the string "Citrix Receiver" from below.
Below is my code
LuisResult result
var strEntity = result.Entities[0].Resolution.Values[0]
but I cannot apply indexing to an expression of type ICollection<object>. It looks as though resolution is defined as a dictionary and upon researching, I have seen other JSON bodies with the resolution having multiple key value pairs. Is it possible the body has changed but the Luis extension in the MS Bot Builder Framework has not?
Thanks.
I had the same problem previously to get a list of resolved entities, I resolved it using the following code:
result.Entities.First().Resolution.Values.Select(s => JArray.Parse(s.ToString()).Distinct().ToList();
So for you it may be a bit shorter like:
result.Entities.First().Resolution.Values.First(s => JArray.Parse(s.ToString());

C# Json File Add New Value

Example:
"database": {
"UUIDTOKEN": {
"name": "test",
"yearsold": "20",
"userid": "100",
"uuid": "a2b45475-aebe-4103-9c01-43ea8754dfc0",
"email": "email#gmail.com"
}
},
How to add new value ? so, database = main json text. How to add uuid {
name,yearsold,userid,uuid,email
}
"database": {
"UUIDTOKEN": {
"name": "test",
"yearsold": "20",
"userid": "100",
"uuid": "a2b45475-aebe-4103-9c01-43ea8754dfc0",
"email": "email#gmail.com"
},
"UUIDTOKEN": {
"name": "stackoverflow",
"yearsold": "24",
"userid": "110",
"uuid": "uuid-aebe-4103-9c01-43ea8754dfc0",
"email": "stackoverflow#gmail.com"
},
},
Thanks for help.
An object (the "database" in your case) can't have two fields with the same name ("UUIDTOKEN"). If you need your "database" to contain a list of "UUIDTOKEN" objects, consider changing it to an array:
{
"database": [{
"name": "test",
"yearsold": "20",
"userid": "100",
"uuid": "a2b45475-aebe-4103-9c01-43ea8754dfc0",
"email": "email#gmail.com"
}, {
"name": "stackoverflow",
"yearsold": "24",
"userid": "110",
"uuid": "uuid-aebe-4103-9c01-43ea8754dfc0",
"email": "stackoverflow#gmail.com"
}]
}
Please note, the "UUIDTOKEN" name was eliminated in this sample, but if you need it for some reason, add another object layer:
{
"database": [{
"UUIDTOKEN": {
"name": "test"
//...
}
}, {
"UUIDTOKEN": {
"name": "stackoverflow"
//...
}
}]
}
I'm not exactly sure what you are asking? Do you want to know how to add another value to a json object in c#, or do you want to know why what you've written won't work?
If it's the latter, you can't have a json object with 2 keys the same, you will need to change UUIDTOKEN to a unique key. Otherwise you could use an array.

Deserialize a JSON Response from OrientDB in CSharp

I started testing OrientDB. I get the following JSON Response from the Demo Database:
{
"schema":{
"name":"OUser",
"properties":{
"roles":{
"name":"roles",
"linkedClass":"ORole",
"type":"LINKSET",
"mandatory":false,
"notNull":true,
"min":null,
"max":null
},
"name":{
"name":"name",
"type":"STRING",
"mandatory":true,
"notNull":false,
"min":null,
"max":null
},
"password":{
"name":"password",
"type":"STRING",
"mandatory":true,
"notNull":false,
"min":null,
"max":null
}
}
},
"result":[
{
"#type":"d",
"#rid":"#4:0",
"#version":0,
"#class":"OUser",
"name":"admin",
"password":"{SHA-256}8C6976E5B5410415BDE908BD4DEE15DFB167A9C873FC4BB8A81F6F2AB448A918",
"status":"ACTIVE",
"roles":[
"#3:0"
]
},
{
"#type":"d",
"#rid":"#4:1",
"#version":0,
"#class":"OUser",
"name":"reader",
"password":"{SHA-256}3D0941964AA3EBDCB00CCEF58B1BB399F9F898465E9886D5AEC7F31090A0FB30",
"status":"ACTIVE",
"roles":[
"#3:1"
]
},
{
"#type":"d",
"#rid":"#4:2",
"#version":0,
"#class":"OUser",
"name":"writer",
"password":"{SHA-256}B93006774CBDD4B299389A03AC3D88C3A76B460D538795BC12718011A909FBA5",
"status":"ACTIVE",
"roles":[
"#3:2"
]
}
]
}
How can you get a List of OUser Objects out of that? Using JSON.Net, JavaScriptSerializer or whatever?
There are a number of json parsers for c# at:
http://www.json.org/. It seems like fastJSON should be pretty quick.
Once logged in execute a query against OUser class:
select from ouser
Via HTTP protocol would be a GET request against this address:
http://localhost:2480/query/demo/sql/select%20from%20ouser
{
"result": [{
"#type": "d", "#rid": "#4:0", "#version": 0, "#class": "OUser",
"name": "admin",
"password": "{SHA-256}8C6976E5B5410415BDE908BD4DEE15DFB167A9C873FC4BB8A81F6F2AB448A918",
"status": "ACTIVE",
"roles": ["#3:0"]
}, {
"#type": "d", "#rid": "#4:1", "#version": 2, "#class": "OUser",
"name": "reader",
"password": "{SHA-256}3D0941964AA3EBDCB00CCEF58B1BB399F9F898465E9886D5AEC7F31090A0FB30",
"status": "ACTIVE",
"roles": ["#3:1"]
}, {
"#type": "d", "#rid": "#4:2", "#version": 0, "#class": "OUser",
"name": "writer",
"password": "{SHA-256}B93006774CBDD4B299389A03AC3D88C3A76B460D538795BC12718011A909FBA5",
"status": "ACTIVE",
"roles": ["#3:2"]
}
]
}
this looks like an easy structure to deserialise using json.NET

Deserialize complex JSON object using ASP.NET?

I've successfully created code that serializes a complex javascript object and posts it to an ASP.NET page. I can get the JSON string using Request.Form and the result looks like this (I've added indentation for readability):
{
"gasterPerStolPerVecka": {
"name": "gasterPerStolPerVecka",
"keyValue_id": "2",
"set_id": "1",
"containerElement": "#gasterPerStolPerVecka",
"keyValueComponents": [
{
"name": "gasterPerStolPerVecka_guestsPerWeek",
"value": "200"
},
{
"name": "gasterPerStolPerVecka_numberOfChairs",
"value": "100"
}
],
"keyValueUnitText": "gäster/stol per vecka",
"keyValueCalculationMethod": "divide",
"isValid": true,
"result": 2
},
"tillgangligaStolstimmarPerVecka": {
"name": "tillgangligaStolstimmarPerVecka",
"keyValue_id": "1",
"set_id": "1",
"containerElement": "#tillgangligaStolstimmarPerVecka",
"keyValueComponents": [
{
"name": "tillgangligaStolstimmarPerVecka_openHoursPerWeek",
"value": "35"
},
{
"name": "tillgangligaStolstimmarPerVecka_numberOfChairs",
"value": "100"
}
],
"keyValueUnitText": "stolstimmar/vecka",
"keyValueCalculationMethod": "multiply",
"isValid": true,
"result": 3500
},
"planeradIntaktPerTillgangligStolOchVecka": {
"name": "planeradIntaktPerTillgangligStolOchVecka",
"keyValue_id": "",
"set_id": "",
"containerElement": "#planeradIntaktPerTillgangligStolOchVecka",
"keyValueComponents": [
{
"name": "planeradIntaktPerTillgangligStolOchVecka_weeklyRevenue",
"value": ""
},
{
"name": "planeradIntaktPerTillgangligStolOchVecka_numberOfChairs",
"value": "100"
}
],
"keyValueUnitText": "kr",
"keyValueCalculationMethod": "divide",
"isValid": false,
"result": null,
"errorText": "Ofullständigt underlag för beräkning."
},
"planeradIntaktPerTillgangligaStolstimmar": {
"name": "planeradIntaktPerTillgangligaStolstimmar",
"keyValue_id": "",
"set_id": "",
"containerElement": "#planeradIntaktPerTillgangligaStolstimmar",
"keyValueComponents": [
{
"name": "planeradIntaktPerTillgangligaStolstimmar_weeklyRevenue",
"value": ""
},
{
"name": "planeradIntaktPerTillgangligaStolstimmar_openHoursPerWeek",
"value": "35"
},
{
"name": "planeradIntaktPerTillgangligaStolstimmar_numberOfChairs",
"value": "100"
}
],
"keyValueUnitText": "kr",
"keyValueCalculationMethod": "divide_divide",
"isValid": false,
"result": null,
"errorText": "Ofullständigt underlag för beräkning."
}
}
Now I try to deserialize this on the server side, but it's difficult. I keep getting the error:
[NullReferenceException: Object reference not set to an instance of an object.]
I don't know where to start looking for errors?
Thanks in advance!
/Thomas Kahn
use stringify instead serialization
You need to use a deserialization library for ASP.NET. See http://json.org/ for libraries that are available or maybe there is one built into ASP.NET. Either way, the code will look like:
String s = getAppropriateFormField();
Object o = JSONLibraryPackage.parse(s);
where obviously you'll have to fill in the blanks for how you get the form field and then what package and method does the parsing.

Categories