Open In App

C Program to Read and Print All Files From a Zip File

Improve
Improve
Like Article
Like
Save
Share
Report

To understand how to write a C program for reading and printing zip files, it’s important to know what exactly a zip file is.

  • At its core, a zip file contains one or more files compressed using specific compression algorithms.
  • Including the compressed data of the files, the zip file contains meta and header information about all files inside the zip file. These contain file names, modification dates, signatures, compression methods, etc.
  • A zip file can also be called a zip archive because of its organization and structure of multiple files.

With the above information in mind we only need two things to programmatically read and print the contents of a zip file:

  1. A decompression method that will decompress the data so that we can read it.
  2. A library for interacting with zip files, to make things way easier

We will be using two libraries that also provide decompression functions to write this program.

Library 1: libzip

Library 2: zlib

Both of these libraries are prerequisites for running the code. Libzip is a higher-level library that already utilizes parts of zlib. Zlib is lower level and therefore more technical to use. There will be code examples, one that uses libzip, and one that uses libzip for zip file interaction while utilizing zlib for the decompression.

  • For help installing libzip: check here
  • For help installing zlib: check here

If these libraries are installed, you can successfully compile them with gcc, just pass compiler flags -lz for method 1 and -lz -lzip for method 2.

Method 1: Reading and Printing All Files from a Zip File using libzip

C




// C program to read and print
// all files in a zip file
// uses library libzip
#include <stdlib.h>
#include <zip.h>
  
// this is run from the command line with the zip file
// passed in example usage: ./program zipfile.zip
int main(int argc, char* argv[])
{
    // if more or less than 2
    // command line arguments,
    // program ends
    if (argc > 2 || argc < 2)
        return -1;
  
    // if the file provided can't
    // be opened/read, program
    // ends
    if (!fopen(argv[1], "r"))
        return -2;
  
    // stores error codes for libzip functions
    int errorp = 0;
  
    // initializes a pointer to a zip archive
    zip_t* arch = NULL;
  
    // sets that pointer to the
    // zip file from argv[1]
    arch = zip_open(argv[1], 0, &errorp);
  
    // the zip_stat structure
    // contains information such as
    // file name, size, comp size
  
    struct zip_stat* finfo = NULL;
  
    // must be allocated enough space
    // (not exact space here)
    finfo = calloc(256, sizeof(int));
  
    // "initializes" the structure
    // according to documentation
  
    zip_stat_init(finfo);
  
    // initialize file descriptor for
    // zip files inside archive
    zip_file_t* fd = NULL;
  
    // initialize string pointer for
    // reading from fd
    char* txt = NULL;
  
    // count = index of file archive   0 =
    // first file
  
    int count = 0;
  
    // we open the file at the count'th index inside the
    // archive we loop and print every file and its
    // contents, stopping when zip_stat_index did not return
    // 0, which means the count index is more than # of
    // files
    while ((zip_stat_index(arch, count, 0, finfo)) == 0) {
  
        // allocate room for the entire file contents
        txt = calloc(finfo->size + 1, sizeof(char));
        fd = zip_fopen_index(
            arch, count, 0); // opens file at count index
                             // reads from fd finfo->size
                             // bytes into txt buffer
        zip_fread(fd, txt, finfo->size);
  
        printf("file #%i: %s\n\n", count + 1,
               finfo->name); // prints filename
        printf("%s\n\n",
               txt); // prints entire file contents
  
        // frees allocated buffer, will
        // reallocate on next iteration of loop
        free(txt);
  
        // increase index by 1 and the loop will
        // stop when files are not found
        count++;
    }
    return 0;
}


 

Example output where my zip file contained this C file

method 1 output

Method 2: Using  Zlib Implementation

Zlib cannot directly access zip files, which is why we will use libzip to open the zip file. Zlib directly is used for decompression of the file contents in this method.

C




// C program to read and print
// all files in a zip file
// uses libraries libzip and zlib
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zip.h>
#include <zlib.h>
  
// Compatibility with Windows
#if defined(MSDOS) || defined(OS2) || defined(WIN32)       \
    || defined(__CYGWIN__)
#include <fcntl.h>
#include <io.h>
#define SET_BINARY_MODE(file)                              \
    setmode(fileno(file), O_BINARY)
#else
#define SET_BINARY_MODE(file)
#endif
  
#define CHUNK 1000
  
// We need to change one line of the zlib library function
// uncompress2 from err = inflateInit(&stream); to err =
// inflateInit2(&stream, -MAX_WBITS);
  
// This tells function that there is no extra zlib, gzip, z
// header information It's just a pure stream of compressed
// data to decompress
  
int ZEXPORT uncompress2(dest, destLen, source,
                        sourceLen) Bytef* dest;
uLongf* destLen;
const Bytef* source;
uLong* sourceLen;
{
    z_stream stream;
    int err;
    const uInt max = (uInt)-1;
    uLong len, left;
  
    // for detection of incomplete stream when
    // *destLen == 0
    Byte buf[1];
  
    len = *sourceLen;
    if (*destLen) {
        left = *destLen;
        *destLen = 0;
    }
    else {
        left = 1;
        dest = buf;
    }
  
    stream.next_in = (z_const Bytef*)source;
    stream.avail_in = 0;
    stream.zalloc = (alloc_func)0;
    stream.zfree = (free_func)0;
    stream.opaque = (voidpf)0;
  
    err = inflateInit2(&stream,
                       -MAX_WBITS); // THIS LINE IS CHANGED
    if (err != Z_OK)
        return err;
  
    stream.next_out = dest;
    stream.avail_out = 0;
  
    do {
        if (stream.avail_out == 0) {
            stream.avail_out
                = left > (uLong)max ? max : (uInt)left;
            left -= stream.avail_out;
        }
        if (stream.avail_in == 0) {
            stream.avail_in
                = len > (uLong)max ? max : (uInt)len;
            len -= stream.avail_in;
        }
        err = inflate(&stream, Z_NO_FLUSH);
    } while (err == Z_OK);
  
    *sourceLen -= len + stream.avail_in;
    if (dest != buf)
        *destLen = stream.total_out;
    else if (stream.total_out && err == Z_BUF_ERROR)
        left = 1;
  
    inflateEnd(&stream);
    return err == Z_STREAM_END
               ? Z_OK
               : err == Z_NEED_DICT
                     ? Z_DATA_ERROR
                     : err == Z_BUF_ERROR
                               && left + stream.avail_out
                           ? Z_DATA_ERROR
                           : err;
}
  
int main(int argc, char* argv[])
{
    // Command line program that only takes exactly 2
    // arguments example usage: ./program zipfile.zip will
    // print the name and contents of every file inside the
    // zip archive
  
    if (argc > 2 || argc < 2)
        return -1;
    if (!fopen(argv[1], "r"))
        return -2;
  
    int errorp = 0; // error code variable
    zip_t* arch = NULL; // Zip archive pointer
    arch = zip_open(argv[1], 0, &errorp);
  
    // allocates space for file information
    struct zip_stat* finfo = NULL;
    finfo = calloc(256, sizeof(int)); // must be allocated
    zip_stat_init(finfo);
  
    // Loop variables
    int index = 0;
    char* txt = NULL;
    zip_file_t* fd = NULL;
    char* outp = NULL;
  
    while (zip_stat_index(arch, index, 0, finfo) == 0) {
  
        txt = calloc(finfo->comp_size + 1, sizeof(char));
        // Read compressed data to buffer txt
        // ZIP_FL_COMPRESSED flag is passed in to read the
        // compressed data
        fd = zip_fopen_index(arch, 0, ZIP_FL_COMPRESSED);
        zip_fread(fd, txt, finfo->comp_size);
  
        outp = calloc(finfo->size + 1, sizeof(char));
        // uncompresses from txt buffer to outp buffer
        // uncompress function calls our uncompress2
        // function defined at top
        uncompress(outp, &finfo->size, txt,
                   finfo->comp_size);
  
        printf("FILE #%i: %s\n", index + 1, finfo->name);
        printf("\n%s\n", outp);
  
        // free memory every iteration
        free(txt);
        free(outp);
        index++;
    }
}


Here is the output of  the above code using a zip file with 2 files contained

Method 2 output



Last Updated : 05 Sep, 2022
Like Article
Save Article
Previous
Next
Share your thoughts in the comments
Similar Reads