none
How to unzip a file in memory using an Azure CloudBlob RRS feed

  • Question

  • Azure doesn't allow me to unzip a file directly in a container.  I downloaded a zip file and now need to expand the file in the zip file.  What I get is a 0 byte file.  I can download the zip to my local computer and see the embedded csv, so the zip file isn't corrupt.  I get no errors, just a zero byte output file.  What am I doing wrong? I've tried all of these options unsuccessfully:

        using (MemoryStream ms = new MemoryStream())
        {
        await zipOutputBlob.DownloadToStreamAsync(ms);
        using (var zipStream = new GZipStream(ms, CompressionMode.Decompress))
        {
        CloudBlockBlob unzippedBlob = container.GetBlockBlobReference(String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) +  ".csv");
        unzippedBlob.Properties.ContentType = "text/csv";
        using (Stream outputFileStream = await unzippedBlob.OpenWriteAsync())
        {
        await zipStream.CopyToAsync(outputFileStream);
        outputFileStream.Flush();
        }
        }
        }

    2nd try:

    using (MemoryStream ms = new MemoryStream())
    {
    await zipOutputBlob.DownloadToStreamAsync(ms);
    using (var zipStream = new GZipStream(ms, CompressionMode.Decompress))
    {
    CloudBlockBlob unzippedBlob = container.GetBlockBlobReference(String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv");
    unzippedBlob.Properties.ContentType = "text/csv";
    await unzippedBlob.UploadFromStreamAsync(zipStream);
    }
    }

    3rd

        using (MemoryStream ms = new MemoryStream())
        {
            await zipOutputBlob.DownloadToStreamAsync(ms);
            using (var zipStream = new GZipStream(ms, CompressionMode.Decompress))
            {
                CloudBlockBlob unzippedBlob = container.GetBlockBlobReference(String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv");
                unzippedBlob.Properties.ContentType = "text/csv";
                using (Stream outputFileStream = await unzippedBlob.OpenWriteAsync())
                {
                    await zipStream.CopyToAsync(outputFileStream);
                    outputFileStream.Flush();
                }
            }
        }

    4th    

        using (MemoryStream ms = new MemoryStream())
        {
            await zipOutputBlob.DownloadToStreamAsync(ms);
            using (DeflateStream decompressionStream = new DeflateStream(ms, CompressionMode.Decompress))
            {
                CloudBlockBlob unzippedBlob = container.GetBlockBlobReference(String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv");
                unzippedBlob.Properties.ContentType = "text/csv";
                using (Stream outputFileStream = await unzippedBlob.OpenWriteAsync())
                {
                    await decompressionStream.CopyToAsync(outputFileStream);
                    outputFileStream.Flush();
                }
            }
        }
        
    5th

        using (var inputStream = new MemoryStream())
        {
            await zipOutputBlob.DownloadToStreamAsync(inputStream);
            inputStream.Seek(0, SeekOrigin.Begin);
        
            using (var gzStream = new GZipStream(inputStream, CompressionMode.Decompress))
            {
                using (var outputStream = new MemoryStream())
                {
                    gzStream.CopyTo(outputStream);
                    byte[] outputBytes = outputStream.ToArray(); // No data. Sad panda. :'(
                    string output = Encoding.ASCII.GetString(outputBytes);
                    CloudBlockBlob unzippedBlob = container.GetBlockBlobReference(String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv");
                    unzippedBlob.Properties.ContentType = "text/csv";
                    await unzippedBlob.UploadTextAsync(output);
                }
            }
        }

    6th
        
        using (var ms = new MemoryStream())
        {
            await zipOutputBlob.DownloadToStreamAsync(ms);
            ms.Seek(0, SeekOrigin.Begin);
        
            using (DeflateStream decompressionStream = new DeflateStream(ms, CompressionMode.Decompress))
            {
                using (var outputStream = new MemoryStream())
                {
                    decompressionStream.CopyTo(outputStream);
                    byte[] outputBytes = outputStream.ToArray(); // No data. Sad panda. :'(
                    string output = Encoding.ASCII.GetString(outputBytes);
                    CloudBlockBlob unzippedBlob = container.GetBlockBlobReference(String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv");
                    unzippedBlob.Properties.ContentType = "text/csv";
                    await unzippedBlob.UploadTextAsync(output);
                }
            }
        }

    Option 5 and 6 fail with this error message on the CopyTo method:
     > System.Private.CoreLib: Exception while executing function: DefinitiveHealthCare. System.IO.Compression: The archive entry was compressed using an unsupported compression method.

    How is this done?
    Friday, January 17, 2020 8:09 PM

Answers

  • I finally figured this out, the change here is to use the ZipArchive class and loop through all the elements in the ZipArchive.  In my case there was only one file in the ZipArchive, so I was trying to skip this step which did not work out well.

        private static async Task UnzipDefinitiveFile(string fileName, CloudBlobContainer container, Logger logger, DateTime lastWriteTime, CloudBlockBlob zipOutputBlob)
        {
            using (MemoryStream blobMemStream = new MemoryStream())
            {
                await zipOutputBlob.DownloadToStreamAsync(blobMemStream);
                using (ZipArchive archive = new ZipArchive(blobMemStream))
                {
                    foreach (ZipArchiveEntry entry in archive.Entries)
                    {
                        logger.Send(SeverityLevel.Verbose, $"Now processing {entry.FullName}");
                        if (entry.FullName != Path.GetFileNameWithoutExtension(fileName) + ".csv")
                            continue;
                        string validName = String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv";
                        CloudBlockBlob blockBlob = container.GetBlockBlobReference(validName);
                        blockBlob.Properties.ContentType = "text/csv";
                        using (var fileStream = entry.Open())
                        {
                            await blockBlob.UploadFromStreamAsync(fileStream);
                        }
                    }
                }
            }
        }

    Another option could be to just use Azure Data Factory V2.  The copy data activity can load a zipped file and it can download a file from an SFTP location.
    Monday, January 27, 2020 7:31 PM

All replies

  • Hi Jack Stormbringer,
    I am not familiar with Azure CloudBlob, I find some related documents you can refer to.
    You can unzip your file with PowerShell or Memory Streams in Azure WebJOBs.
    More details you can refer to this links.
    [Unzip file in azure storage]
    [How to Unzip Azure Blobs programmatically using Memory Streams in Azure WebJOBs]
    Note: This response contains a reference to a third party World Wide Web site. Microsoft is providing this information as a convenience to you. Microsoft does not control these sites and has not tested any software or information found on these sites; Therefore, Microsoft cannot make any representations regarding the quality, safety, or suitability of any software or information found there. There are inherent dangers in the use of any software found on the Internet, and Microsoft cautions you to make sure that you completely understand the risk before retrieving any software from the Internet.
    Best Regards,
    Daniel Zhang


    MSDN Community Support
    Please remember to click "Mark as Answer" the responses that resolved your issue, and to click "Unmark as Answer" if not. This can be beneficial to other community members reading this thread. If you have any compliments or complaints to MSDN Support, feel free to contact MSDNFSF@microsoft.com.

    Monday, January 20, 2020 9:52 AM
  • I'm really looking for help in doing this in C#, alternate solutions will just having me spinning up additional resources and costing me and the company I work for more money. It will also make the end solution unnecessarily complex.  
    Tuesday, January 21, 2020 4:03 PM
  • I finally figured this out, the change here is to use the ZipArchive class and loop through all the elements in the ZipArchive.  In my case there was only one file in the ZipArchive, so I was trying to skip this step which did not work out well.

        private static async Task UnzipDefinitiveFile(string fileName, CloudBlobContainer container, Logger logger, DateTime lastWriteTime, CloudBlockBlob zipOutputBlob)
        {
            using (MemoryStream blobMemStream = new MemoryStream())
            {
                await zipOutputBlob.DownloadToStreamAsync(blobMemStream);
                using (ZipArchive archive = new ZipArchive(blobMemStream))
                {
                    foreach (ZipArchiveEntry entry in archive.Entries)
                    {
                        logger.Send(SeverityLevel.Verbose, $"Now processing {entry.FullName}");
                        if (entry.FullName != Path.GetFileNameWithoutExtension(fileName) + ".csv")
                            continue;
                        string validName = String.Format("{0:yyyy-MM-dd}", lastWriteTime) + " " + Path.GetFileNameWithoutExtension(fileName) + ".csv";
                        CloudBlockBlob blockBlob = container.GetBlockBlobReference(validName);
                        blockBlob.Properties.ContentType = "text/csv";
                        using (var fileStream = entry.Open())
                        {
                            await blockBlob.UploadFromStreamAsync(fileStream);
                        }
                    }
                }
            }
        }

    Another option could be to just use Azure Data Factory V2.  The copy data activity can load a zipped file and it can download a file from an SFTP location.
    Monday, January 27, 2020 7:31 PM