How do I iterate CloudBlobDirectory to copy the data? - c#

I have written below code to iterate through the Gen2 storage blob
CloudStorageAccount sourceAccount = CloudStorageAccount.Parse(sourceConnection);
CloudStorageAccount destAccount = CloudStorageAccount.Parse(destConnection);
CloudBlobClient sourceClient = sourceAccount.CreateCloudBlobClient();
CloudBlobClient destClient = destAccount.CreateCloudBlobClient();
CloudBlobContainer sourceBlobContainer = sourceClient.GetContainerReference(sourceContainer);
// Find all blobs that haven't changed since the specified date and time
IEnumerable<ICloudBlob> sourceBlobRefs = FindMatchingBlobsAsync(sourceBlobContainer, transferBlobsNotModifiedSince).Result;
private static async Task<IEnumerable<ICloudBlob>> FindMatchingBlobsAsync(CloudBlobContainer blobContainer, DateTime transferBlobsNotModifiedSince)
{
List<ICloudBlob> blobList = new List<ICloudBlob>();
BlobContinuationToken token = null;
// Iterate through the blobs in the source container
do
{
BlobResultSegment segment = await blobContainer.ListBlobsSegmentedAsync(prefix: "", currentToken: token);
foreach (CloudBlobDirectory VARIABLE in segment.Results)
{
BlobResultSegment segment2 = await VARIABLE.ListBlobsSegmentedAsync(currentToken: token);
foreach (CloudBlobDirectory VARIABLE2 in segment2.Results)//Bad coding
{
//how do I get children count ?
}
}
}while (token != null);
}
This will iterate only 2 levels but not dynamically till the inner levels. I have blob in below hierarchy
--Container
--FolderA
--FolderAA
--FolderAA1
--File1.txt
--File2.txt
--FolderAA2
--File1.txt
--File2.txt
--FolderAA3
--FolderAB
--File8.txt
--FolderAC
--File9.txt
This hierarchy is dynamic
How do I loop and copy the blob content.
Note: I do not want to use CLI commands to copy. Because I won't have any control once copy started.
Update
Found some samples here: https://csharp.hotexamples.com/examples/Microsoft.WindowsAzure.Storage.Blob/CloudBlobContainer/ListBlobsSegmented/php-cloudblobcontainer-listblobssegmented-method-examples.html

Please see the sample code below:
class Program
{
static void Main(string[] args)
{
var storageAccount = CloudStorageAccount.Parse("UseDevelopmentStorage=true");
var client = storageAccount.CreateCloudBlobClient();
var container = client.GetContainerReference("test");
var blobs = FindMatchingBlobsAsync(container).GetAwaiter().GetResult();
foreach (var blob in blobs)
{
Console.WriteLine(blob.Name);
}
Console.WriteLine("-------------------------------------");
Console.WriteLine("List of all blobs fetched. Press any key to terminate the application.");
Console.ReadKey();
}
private static async Task<IEnumerable<ICloudBlob>> FindMatchingBlobsAsync(CloudBlobContainer blobContainer)
{
List<ICloudBlob> blobList = new List<ICloudBlob>();
BlobContinuationToken token = null;
// Iterate through the blobs in the source container
do
{
BlobResultSegment segment = await blobContainer.ListBlobsSegmentedAsync(prefix: "", useFlatBlobListing: true, BlobListingDetails.None, 5000, token, new BlobRequestOptions(), new OperationContext());
token = segment.ContinuationToken;
foreach(var item in segment.Results)
{
blobList.Add((ICloudBlob)item);
}
} while (token != null);
return blobList;
}
}

Related

unable to access storage account blob getting BlobContainerValidationError error c#

I'm trying to read data from blob storage to use iothub import job.
I can able to write into blob file successfuly but while reading from blob it gives me below exception -
await registryManager.ImportDevicesAsync(containerSasUri, containerSasUri);
{"{\"Message\":\"ErrorCode:BlobContainerValidationError;Failed to read
devices blob from input container.\",\"ExceptionMessage\":\"Tracking
ID:6f06c1ce39f04494b929a2249ce069f2-G:9-TimeStamp:01/06/2019
11:57:23\"}"}
static string GetContainerSasUri(CloudBlobContainer container)
{
// Set the expiry time and permissions for the container.
// In this case no start time is specified, so the
// shared access signature becomes valid immediately.
var sasConstraints = new SharedAccessBlobPolicy();
sasConstraints.SharedAccessExpiryTime = DateTime.UtcNow.AddHours(24);
sasConstraints.Permissions =
SharedAccessBlobPermissions.Write |
SharedAccessBlobPermissions.Read |
SharedAccessBlobPermissions.Delete | SharedAccessBlobPermissions.Add | SharedAccessBlobPermissions.Create;
// Generate the shared access signature on the container,
// setting the constraints directly on the signature.
string sasContainerToken = container.GetSharedAccessSignature(sasConstraints);
// Return the URI string for the container,
// including the SAS token.
return container.Uri + sasContainerToken;
}
registryManager = RegistryManager.CreateFromConnectionString(connectionString);
CloudStorageAccount storageAccount = CloudStorageAccount.Parse("connection-string");
// Create a blob client.
CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient();
CloudBlobContainer container = blobClient.GetContainerReference("test");
CloudBlockBlob blob = container.GetBlockBlobReference("demo123.txt");
var containerSasUri = GetContainerSasUri(container);
// Provision 1,000 more devices
serializedDevices = new List<string>();
for (var i = 0; i < 5; i++)
{
// Create a new ExportImportDevice
// CryptoKeyGenerator is in the Microsoft.Azure.Devices.Common namespace
var deviceToAdd = new ExportImportDevice()
{
Id = i+"look",
Status = DeviceStatus.Enabled,
Authentication = new AuthenticationMechanism()
{
SymmetricKey = new SymmetricKey()
{
PrimaryKey = CryptoKeyGenerator.GenerateKey(32),
SecondaryKey = CryptoKeyGenerator.GenerateKey(32)
}
},
ImportMode = ImportMode.Create
};
// Add device to the list
serializedDevices.Add(JsonConvert.SerializeObject(deviceToAdd));
}
var tt = serializedDevices;
// Write the list to the blob
var sb = new StringBuilder();
serializedDevices.ForEach(serializedDevice => sb.AppendLine(serializedDevice));
//await blob.DeleteIfExistsAsync();
using (CloudBlobStream stream = await blob.OpenWriteAsync())
{
byte[] bytes = Encoding.UTF8.GetBytes(sb.ToString());
for (var i = 0; i < bytes.Length; i += 500)
{
int length = Math.Min(bytes.Length - i, 500);
await stream.WriteAsync(bytes, i, length);
}
}
// Call import using the blob to add new devices
// Log information related to the job is written to the same container
// This normally takes 1 minute per 100 devices
JobProperties importJob =
await registryManager.ImportDevicesAsync(containerSasUri, containerSasUri);
try to setup:
sasConstraints.SharedAccessStartTime = DateTimeOffset.UtcNow.AddMinutes(-5);
Update:
The default input blob name is devices.txt. In your implementation, the name of the input blob is demo123.txt, so you have to change:
await registryManager.ImportDevicesAsync(containerSasUri, containerSasUri, "demo123.txt");

How to get files modified today from Azure Blob Storage

I want to get files from Azure Blob Storage that are modified everyday. We have a container in Azure which is filled by two Excel files every day, and I need to get those files.
So far, I'm only able to get one file using latestmodifiedon. How can I get both files?
private static DataSet GetExcelBlobData()
{
var containerName = "salesbycontract";
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(Microsoft.Azure.CloudConfigurationManager.GetSetting("StorageConnectionString"));
CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient();
CloudBlobContainer container = blobClient.GetContainerReference(containerName);
CloudBlockBlob blockbob = container.ListBlobs().OfType<CloudBlockBlob>().OrderByDescending(m => m.Properties.LastModified).ToList().First();
var x = blockbob.Name;
Console.WriteLine(x);
DataSet ds;
using (var memstream = new MemoryStream())
{
blockbob.DownloadToStream(memstream);
var excelReader = ExcelReaderFactory.CreateOpenXmlReader(memstream);
ds = excelReader.AsDataSet();
excelReader.Close();
}
return ds;
}
Just add a Where clauses and compare to DateTime.Today:
var blockbob = container.ListBlobs().OfType<CloudBlockBlob>()
.Where(m => m.Properties.LastModified.Value.Date == DateTime.Today).ToList().First();
I added a working example to my GitHub repository that uses dotnet core with the latest WindowsAzure.Storage SDK:
public async Task RetrieveBlobsModifiedTodayAsync()
{
var container = _blobClient.GetContainerReference(_storageAccount.ContainerName);
BlobContinuationToken blobContinuationToken = null;
do
{
var results = await container.ListBlobsSegmentedAsync(null, blobContinuationToken);
var blobs = results.Results.OfType<CloudBlockBlob>()
.Where(b => b.Properties.LastModified != null && b.Properties.LastModified.Value.Date == DateTime.Today);
blobContinuationToken = results.ContinuationToken;
foreach (var item in blobs)
{
Console.WriteLine(item.Uri);
}
} while (blobContinuationToken != null); // Loop while the continuation token is not null.
}
Migrated from Fikri Hailal's (OP) November 8, 2018 edit that added an answer to their question:
Below is the query which work for my case:
var blockbob = container.ListBlobs().OfType<CloudBlockBlob>().OrderByDescending(m => m.Properties.LastModified).ToList().Take(2);

How to delete files from blob container?

private readonly CloudBlobContainer _blobContainer;
public void Remove()
{
if (_blobContainer.Exists())
{
_blobContainer.Delete();
}
}
How to delete not a whole container but some List<string> disks that in the container?
This is the code I use:
private CloudBlobContainer blobContainer;
public void DeleteFile(string uniqueFileIdentifier)
{
this.AssertBlobContainer();
var blob = this.blobContainer.GetBlockBlobReference(uniqueFileIdentifier);
blob.DeleteIfExists();
}
private void AssertBlobContainer()
{
// only do once
if (this.blobContainer == null)
{
lock (this.blobContainerLockObj)
{
if (this.blobContainer == null)
{
var client = this.cloudStorageAccount.CreateCloudBlobClient();
this.blobContainer = client.GetContainerReference(this.containerName.ToLowerInvariant());
if (!this.blobContainer.Exists())
{
throw new CustomRuntimeException("Container {0} does not exist in azure account", containerName);
}
}
}
}
if (this.blobContainer == null) throw new NullReferenceException("Blob Empty");
}
You can ignore the locking code if you know this isn't going to be accessed simultaneously
Obviously, you have the blobContainer stuff sorted, so all you need is that DeleteFile method without the this.AssertBlobContainer().
Remember SDK v11 has been deprecated, with SDK v12:
using Azure.Storage.Blobs;
...
BlobServiceClient blobServiceClient = new BlobServiceClient("StorageConnectionString");
BlobContainerClient cont = blobServiceClient.GetBlobContainerClient("containerName");
cont.GetBlobClient("FileName.ext").DeleteIfExists();
There's a method called DeleteIfExistis(). Returns true/false.
CloudBlockBlob blob = CloudBlobContainer.GetBlockBlobReference(fileName);
blob.DeleteIfExists();
Filename is ContainerName/FileName, if is inside folders you need to mention the folder too. Like ContainerName/AppData/FileName and will work.
A single line code to perform deletion
private static async Task DeleteBLOBFile(string blobNamewithFileExtension)
{
BlobClient blobClient = new BlobClient(blobConnectionString,containerName,blobNamewithFileExtension);
await blobClient.DeleteIfExistsAsync();
}
We can use the cloudBlobContainer.ListBlobsSegmentedAsync to list the blobs and then cast it as ICloudBlob so that you can perform the DeleteIfExistsAsync. Below is the working sample function. Hope it helps.
public async Task < bool > PerformTasks() {
try {
if (CloudStorageAccount.TryParse(StorageConnectionString, out CloudStorageAccount cloudStorageAccount)) {
var cloudBlobClient = cloudStorageAccount.CreateCloudBlobClient();
var cloudBlobContainer = cloudBlobClient.GetContainerReference(_blobContainerName);
if (await cloudBlobContainer.ExistsAsync()) {
BlobContinuationToken blobContinuationToken = null;
var blobList = await cloudBlobContainer.ListBlobsSegmentedAsync(blobContinuationToken);
var cloudBlobList = blobList.Results.Select(blb = >blb as ICloudBlob);
foreach(var item in cloudBlobList) {
await item.DeleteIfExistsAsync();
}
return true;
}
else {
_logger.LogError(ErrorMessages.NoBlobContainerAvailable);
}
}
else {
_logger.LogError(ErrorMessages.NoStorageConnectionStringAvailable);
}
}
catch(Exception ex) {
_logger.LogError(ex.Message);
}
return false;
}
List<string> FileNameList = new List<string>();
FileNameList = fileName.Split(',').Where(t => t.ToString().Trim() != "").ToList();
CloudBlobClient client;
CloudBlobContainer container;
CloudBlockBlob blob;
string accessKey;
string accountName;
string connectionString;
accessKey = Environment.GetEnvironmentVariable("StorageAccountaccessKey");
accountName = Environment.GetEnvironmentVariable("StorageAccountName");
connectionString = Environment.GetEnvironmentVariable("StorageAccountConnectionString");
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString);
CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient();
client = storageAccount.CreateCloudBlobClient();
string containerName = tenantId;
container = client.GetContainerReference(containerName);
foreach(var file in FileNameList)
{
blob = container.GetBlockBlobReference(file);
blob.DeleteIfExists();
}

Azure Blob Storage list container and blobs

I am working on a Azure Storage project where i need to upload and download blobs in a container and list the container and blob in a listbox. I am unable to display the container and the blobs in my listbox.
This is my code to List:
And finally the code behind the interface where i call my upload, download and list methods:
The reason why you don't see any result when you click on Button3 in your webform is because you don't get back any data from the ListBlob method.
Change the ListBlob method to return a result like:
public List<string> GetBlobs()
{
List<string> blobs = new List<string>();
// Retrieve storage account from connection string.
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(
CloudConfigurationManager.GetSetting("StorageConnectionString"));
// Create the blob client.
CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient();
// Retrieve reference to a previously created container.
CloudBlobContainer container = blobClient.GetContainerReference("mycontainer");
// Loop over items within the container and output the length and URI.
foreach (IListBlobItem item in container.ListBlobs(null, false))
{
if (item.GetType() == typeof (CloudBlockBlob))
{
CloudBlockBlob blob = (CloudBlockBlob) item;
blobs.Add(string.Format("Block blob of length {0}: {1}", blob.Properties.Length, blob.Uri));
}
else if (item.GetType() == typeof (CloudPageBlob))
{
CloudPageBlob pageBlob = (CloudPageBlob) item;
blobs.Add(string.Format("Page blob of length {0}: {1}", pageBlob.Properties.Length, pageBlob.Uri));
}
else if (item.GetType() == typeof (CloudBlobDirectory))
{
CloudBlobDirectory directory = (CloudBlobDirectory) item;
blobs.Add(string.Format("Directory: {0}", directory.Uri));
}
}
return blobs;
}
Than in your webform, I assume you have a ListBox with the name ListBox1. Call the method like:
protected void Button3_Click(object sender, EventArgs e)
{
ListBox1.DataSource = GetBlobs();
ListBox1.DataBind();
}
It isn't clear to me what problem you are experiencing as you haven't explained fully. Listing blobs within a container including paging support is demonstrated in the following code extracted from this sample.
BlobContinuationToken token = null;
do
{
BlobResultSegment resultSegment = await container.ListBlobsSegmentedAsync(token);
token = resultSegment.ContinuationToken;
foreach (IListBlobItem blob in resultSegment.Results)
{
// Blob type will be CloudBlockBlob, CloudPageBlob or CloudBlobDirectory
Console.WriteLine("{0} (type: {1}", blob.Uri, blob.GetType());
}
} while (token != null);

How to get a list of all the blobs in a container in Azure?

I have the account name and account key of a storage account in Azure. I need to get a list of all the blobs in a container in that account. (The "$logs" container).
I am able to get the information of a specific blob using the CloudBlobClient class but can't figure out how to get a list of all the blobs within the $logs container.
There is a sample of how to list all of the blobs in a container at https://azure.microsoft.com/en-us/documentation/articles/storage-dotnet-how-to-use-blobs/#list-the-blobs-in-a-container:
// Retrieve the connection string for use with the application. The storage
// connection string is stored in an environment variable on the machine
// running the application called AZURE_STORAGE_CONNECTION_STRING. If the
// environment variable is created after the application is launched in a
// console or with Visual Studio, the shell or application needs to be closed
// and reloaded to take the environment variable into account.
string connectionString = Environment.GetEnvironmentVariable("AZURE_STORAGE_CONNECTION_STRING");
// Create a BlobServiceClient object which will be used to create a container client
BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);
// Get the container client object
BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient("yourContainerName");
// List all blobs in the container
await foreach (BlobItem blobItem in containerClient.GetBlobsAsync())
{
Console.WriteLine("\t" + blobItem.Name);
}
Here's the updated API call for WindowsAzure.Storage v9.0:
private static CloudBlobClient _blobClient = CloudStorageAccount.Parse("connectionstring").CreateCloudBlobClient();
public async Task<IEnumerable<CloudAppendBlob>> GetBlobs()
{
var container = _blobClient.GetContainerReference("$logs");
BlobContinuationToken continuationToken = null;
//Use maxResultsPerQuery to limit the number of results per query as desired. `null` will have the query return the entire contents of the blob container
int? maxResultsPerQuery = null;
do
{
var response = await container.ListBlobsSegmentedAsync(string.Empty, true, BlobListingDetails.None, maxResultsPerQuery, continuationToken, null, null);
continuationToken = response.ContinuationToken;
foreach (var blob in response.Results.OfType<CloudAppendBlob>())
{
yield return blob;
}
} while (continuationToken != null);
}
Update for IAsyncEnumerable
IAsyncEnumerable is now available in .NET Standard 2.1 and .NET Core 3.0
private static CloudBlobClient _blobClient = CloudStorageAccount.Parse("connectionstring").CreateCloudBlobClient();
public async IAsyncEnumerable<CloudAppendBlob> GetBlobs()
{
var container = _blobClient.GetContainerReference("$logs");
BlobContinuationToken continuationToken = null;
//Use maxResultsPerQuery to limit the number of results per query as desired. `null` will have the query return the entire contents of the blob container
int? maxResultsPerQuery = null;
do
{
var response = await container.ListBlobsSegmentedAsync(string.Empty, true, BlobListingDetails.None, maxResultsPerQuery, continuationToken, null, null);
continuationToken = response.ContinuationToken;
foreach (var blob in response.Results.OfType<CloudAppendBlob>())
{
yield return blob;
}
} while (continuationToken != null);
}
Using the new package Azure.Storage.Blobs
BlobServiceClient blobServiceClient = new BlobServiceClient("YourStorageConnectionString");
BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient("YourContainerName");
var blobs = containerClient.GetBlobs();
foreach (var item in blobs){
Console.WriteLine(item.Name);
}
Since you container name is $logs, so i think your blob type is append blob. Here's a method to get all blobs and return IEnumerable:
private static CloudBlobClient _blobClient = CloudStorageAccount.Parse("connectionstring").CreateCloudBlobClient();
public IEnumerable<CloudAppendBlob> GetBlobs()
{
var container = _blobClient.GetContainerReference("$logs");
BlobContinuationToken continuationToken = null;
do
{
var response = container.ListBlobsSegmented(string.Empty, true, BlobListingDetails.None, new int?(), continuationToken, null, null);
continuationToken = response.ContinuationToken;
foreach (var blob in response.Results.OfType<CloudAppendBlob>())
{
yield return blob;
}
} while (continuationToken != null);
}
The method can be asynchronous, just use ListBlobsSegmentedAsync. One thing you need to note is the argument useFlatBlobListing need to be true which means that ListBlobs will return a flat list of files as opposed to a hierarchical list.
Use ListBlobsSegmentedAsync which returns a segment of the total result set and a continuation token.
ref:https://learn.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-dotnet?tabs=windows
In WebApI -> Swagger
[HttpGet(nameof(GetFileList))]
public async Task<IActionResult> GetFileList()
{
BlobServiceClient blobServiceClient = new BlobServiceClient(_configuration.GetValue<string>("BlobConnectionString"));
BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient(_configuration.GetValue<string>("BlobContainerName"));
var blobs = containerClient.GetBlobs();
return Ok(blobs);
}

Categories