/*
  Archive Diff - display differences between two archives
  Copyright (C) 2011  Christopher Howard
  
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#define _XOPEN_SOURCE 700

#include <archive.h>
#include <archive_entry.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>

#include "concat.h"
#include "err.h"
#include "config.h"

void copy_data(struct archive *ar, struct archive *aw);
void archive_err_hand(int ret, struct archive * a);
void extract_errmsg();
void ext_comp_warn();

#define BLOCK_SIZE 64

const char const temp_templ[] = "archdiff-XXXXXX";

char * extract(const char * archive_path, const char * tmpdir) {
  /* preserve current working dir across function */
  FILE * ocwd = fopen(".", "r");
  if(ocwd == NULL) {
    const int e = errno;
    extract_errmsg();
    fprintf(stderr, "error: problem getting current working directory");
    fopen_errmsg(e, ".");
    exit(e);
  }

  /* prep to read from archive */
  struct archive * const a = archive_read_new();

#ifdef USE_BZIP2
  if(archive_read_support_compression_bzip2(a) == ARCHIVE_WARN) { ext_comp_warn(); }
#endif
#ifdef USE_COMPRESS
  if(archive_read_support_compression_compress(a) == ARCHIVE_WARN) { ext_comp_warn(); }
#endif
#ifdef USE_GZIP
  if(archive_read_support_compression_gzip(a) == ARCHIVE_WARN) { ext_comp_warn(); }
#endif
#ifdef USE_LZMA
  if(archive_read_support_compression_lzma(a) == ARCHIVE_WARN) { ext_comp_warn(); }
#endif
#ifdef USE_XZ
  if(archive_read_support_compression_xz(a) == ARCHIVE_WARN) { ext_comp_warn(); }
#endif
#ifdef USE_AR
  archive_read_support_format_ar(a);
#endif
#ifdef USE_CPIO
  archive_read_support_format_cpio(a);
#endif
#ifdef USE_ISO9660
  archive_read_support_format_iso9660(a);
#endif
#ifdef USE_MTREE
  archive_read_support_format_mtree(a);
#endif
#ifdef USE_TAR
  archive_read_support_format_tar(a);
#endif
#ifdef USE_ZIP
  archive_read_support_format_zip(a);
#endif
  
  archive_err_hand(archive_read_open_filename(a, archive_path, BLOCK_SIZE), a);

  /* prep to write to new working directory */
  const char * const wrkdir = mkdtemp(concat(tmpdir, "/", temp_templ, NULL));
  if(wrkdir == NULL) {
    const int e = errno;
    extract_errmsg();
    mkdtemp_errmsg(e);
    exit(e);
  }
  if(chdir(wrkdir) == -1) {
    const int e = errno;
    extract_errmsg();
    chdir_errmsg(e, wrkdir);
    exit(e);
  }
  struct archive * const wd = archive_write_disk_new();
  archive_err_hand(archive_write_disk_set_options(wd, 
					      ARCHIVE_EXTRACT_NO_OVERWRITE |
					      ARCHIVE_EXTRACT_SECURE_SYMLINKS |
					      ARCHIVE_EXTRACT_SECURE_NODOTDOT
					      ), wd);

  /* actually do the extraction */
  struct archive_entry * const entry = archive_entry_new();
  while(1) {
    archive_entry_clear(entry);
    {
      const int ret = archive_read_next_header2(a, entry);
      if(ret == ARCHIVE_EOF) { break; }
      archive_err_hand(ret, a);
    }
    const char * pathname = archive_entry_pathname(entry);
    if(strcmp(pathname, "./") == 0) {
      /*
	Yes, I actually found an archive with an entry that was simply "./".
	ghc-bin-7.0.4-amd64.tbz2, if you would like to know.
	Why the heck would anyone include an entry for creating the current
	working directory, anyway...?

	Skip this entry.
      */
    } else {
      if(strncmp(pathname, "./", 2) == 0) {
	/*
	  This voodoo seems deals with some problems cause by archives which
	  start their entries with "./".
	  Why the heck would you do that, anyway...?

	  It seems that if libarchive gets a path that starts with "./", it will
	  attempt to create it, which generates an error, at least under my
	  option flags.

	  I suppose additional instances of "./" could appear in the pathname,
	  but it seems like thats never going to happen unless somebody is
	  simply trying to make archdiff die.
	*/
	pathname += 2;
	archive_entry_set_pathname(entry, pathname);
      }
      /* must prevent absolute paths to keep everything in the sandbox */
      if(*pathname == '/') {
	pathname++;
	/* could an entry start with multiple forward-slashes...? I know "ls"
	   takes multiple forward-slashes as valid. Better safe than sorry. */
	while(*pathname == '/') { pathname++; }
	archive_entry_set_pathname(entry, pathname);
      }
      archive_err_hand(archive_write_header(wd, entry), wd);
      copy_data(a, wd);
      archive_err_hand(archive_write_finish_entry(wd), wd);
    }
  }
    
  /* clean up */
  archive_write_close(wd);
  archive_read_close(a);
  archive_read_finish(a);

  /* put current working directory back the way we found it */
  if(fchdir(fileno(ocwd)) == -1) {
    const int e = errno;
    extract_errmsg();
    fchdir_errmsg(e);
    exit(e);
  }
  if(close(fileno(ocwd)) == -1) {
      const int e = errno;
      extract_errmsg();
      close_errmsg(e);
      exit(e);
    }

  /* we'll need to know where the files actually were extracted to */
  return (char *) wrkdir;
}

/* mostly lifted from libarchive example */
void copy_data(struct archive *ar, struct archive *aw)
{
  int r;
  const void *buff;
  size_t size;
  off_t offset;

  for (;;) {
    r = archive_read_data_block(ar, &buff, &size, &offset);
    if (r == ARCHIVE_EOF) { break; } 
    if (r != ARCHIVE_OK) {
      fprintf(stderr, "error: %s\n", archive_error_string(ar));
      exit(archive_errno(ar));
    }
    r = archive_write_data_block(aw, buff, size, offset);
    if (r != ARCHIVE_OK) {
      fprintf(stderr, "error: %s\n", archive_error_string(aw));
      exit(archive_errno(aw));
    }
  }
}

void archive_err_hand(int ret, struct archive * a) {
  switch(ret) {
  case ARCHIVE_OK:
    break;
  case ARCHIVE_WARN:
    fprintf(stderr, "warning: %s\n", archive_error_string(a));
    break;
  default:
    extract_errmsg();
    fprintf(stderr, "error: %s\n", archive_error_string(a));
    exit(archive_errno(a));
  }
}

void extract_errmsg() {
  fprintf(stderr, "error: problem extracting an archive to the temporary directory\n");
}

void ext_comp_warn() {
  fprintf(stderr, "warning: external compression support detected");
}


