pythontensorflowimage-segmentationtensorflow-datasetsmscoco

"Error while extracting" from tensorflow datasets


I want to train a tensorflow image segmentation model on COCO, and thought I would leverage the dataset builder already included. Download seems to be completed but it crashes on extracting the zip files.

Running with TF 2.0.0 on a Jupyter Notebook under a conda environment. Computer is 64-bit Windows 10. The Oxford Pet III dataset used in the official image segmentation tutorial works fine.

Below is the error message (my local user name replaced with %user%).

---------------------------------------------------------------------------
OutOfRangeError                           Traceback (most recent call last)
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in _sync_extract(self, from_path, method, to_path)
     88     try:
---> 89       for path, handle in iter_archive(from_path, method):
     90         path = tf.compat.as_text(path)

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in iter_zip(arch_f)
    176   with _open_or_pass(arch_f) as fobj:
--> 177     z = zipfile.ZipFile(fobj)
    178     for member in z.infolist():

~\.conda\envs\tf-tutorial\lib\zipfile.py in __init__(self, file, mode, compression, allowZip64)
   1130             if mode == 'r':
-> 1131                 self._RealGetContents()
   1132             elif mode in ('w', 'x'):

~\.conda\envs\tf-tutorial\lib\zipfile.py in _RealGetContents(self)
   1193         try:
-> 1194             endrec = _EndRecData(fp)
   1195         except OSError:

~\.conda\envs\tf-tutorial\lib\zipfile.py in _EndRecData(fpin)
    263     # Determine file size
--> 264     fpin.seek(0, 2)
    265     filesize = fpin.tell()

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\util\deprecation.py in new_func(*args, **kwargs)
    506                 instructions)
--> 507       return func(*args, **kwargs)
    508 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in seek(self, offset, whence, position)
    166       elif whence == 2:
--> 167         offset += self.size()
    168       else:

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in size(self)
    101     """Returns the size of the file."""
--> 102     return stat(self.__name).length
    103 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in stat(filename)
    726   """
--> 727   return stat_v2(filename)
    728 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in stat_v2(path)
    743   file_statistics = pywrap_tensorflow.FileStatistics()
--> 744   pywrap_tensorflow.Stat(compat.as_bytes(path), file_statistics)
    745   return file_statistics

OutOfRangeError: C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error

During handling of the above exception, another exception occurred:

ExtractError                              Traceback (most recent call last)
<ipython-input-27-887fa0198611> in <module>
      1 cocoBuilder = tfds.builder('coco')
      2 info = cocoBuilder.info
----> 3 cocoBuilder.download_and_prepare()

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
     50     _check_no_positional(fn, args, ismethod, allowed=allowed)
     51     _check_required(fn, kwargs)
---> 52     return fn(*args, **kwargs)
     53 
     54   return disallow_positional_args_dec(wrapped)  # pylint: disable=no-value-for-parameter

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in download_and_prepare(self, download_dir, download_config)
    285         self._download_and_prepare(
    286             dl_manager=dl_manager,
--> 287             download_config=download_config)
    288 
    289         # NOTE: If modifying the lines below to put additional information in

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in _download_and_prepare(self, dl_manager, download_config)
    946     super(GeneratorBasedBuilder, self)._download_and_prepare(
    947         dl_manager=dl_manager,
--> 948         max_examples_per_split=download_config.max_examples_per_split,
    949     )
    950 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in _download_and_prepare(self, dl_manager, **prepare_split_kwargs)
    802     # Generating data for all splits
    803     split_dict = splits_lib.SplitDict()
--> 804     for split_generator in self._split_generators(dl_manager):
    805       if splits_lib.Split.ALL == split_generator.split_info.name:
    806         raise ValueError(

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\image\coco.py in _split_generators(self, dl_manager)
    237     root_url = 'http://images.cocodataset.org/'
    238     extracted_paths = dl_manager.download_and_extract({
--> 239         key: root_url + url for key, url in urls.items()
    240     })
    241 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in download_and_extract(self, url_or_urls)
    357     with self._downloader.tqdm():
    358       with self._extractor.tqdm():
--> 359         return _map_promise(self._download_extract, url_or_urls)
    360 
    361   @property

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in _map_promise(map_fn, all_inputs)
    393   """Map the function into each element and resolve the promise."""
    394   all_promises = utils.map_nested(map_fn, all_inputs)  # Apply the function
--> 395   res = utils.map_nested(_wait_on_promise, all_promises)
    396   return res

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
    127     return {
    128         k: map_nested(function, v, dict_only, map_tuple)
--> 129         for k, v in data_struct.items()
    130     }
    131   elif not dict_only:

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in <dictcomp>(.0)
    127     return {
    128         k: map_nested(function, v, dict_only, map_tuple)
--> 129         for k, v in data_struct.items()
    130     }
    131   elif not dict_only:

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
    141         return tuple(mapped)
    142   # Singleton
--> 143   return function(data_struct)
    144 
    145 

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in _wait_on_promise(p)
    377 
    378   def _wait_on_promise(p):
--> 379     return p.get()
    380 
    381 else:

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in get(self, timeout)
    508         target = self._target()
    509         self._wait(timeout or DEFAULT_TIMEOUT)
--> 510         return self._target_settled_value(_raise=True)
    511 
    512     def _target_settled_value(self, _raise=False):

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in _target_settled_value(self, _raise)
    512     def _target_settled_value(self, _raise=False):
    513         # type: (bool) -> Any
--> 514         return self._target()._settled_value(_raise)
    515 
    516     _value = _reason = _target_settled_value

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in _settled_value(self, _raise)
    222             if _raise:
    223                 raise_val = self._fulfillment_handler0
--> 224                 reraise(type(raise_val), raise_val, self._traceback)
    225             return self._fulfillment_handler0
    226 

~\.conda\envs\tf-tutorial\lib\site-packages\six.py in reraise(tp, value, tb)
    694             if value.__traceback__ is not tb:
    695                 raise value.with_traceback(tb)
--> 696             raise value
    697         finally:
    698             value = None

~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in handle_future_result(future)
    840         # type: (Any) -> None
    841         try:
--> 842             resolve(future.result())
    843         except Exception as e:
    844             tb = exc_info()[2]

~\.conda\envs\tf-tutorial\lib\concurrent\futures\_base.py in result(self, timeout)
    423                 raise CancelledError()
    424             elif self._state == FINISHED:
--> 425                 return self.__get_result()
    426 
    427             self._condition.wait(timeout)

~\.conda\envs\tf-tutorial\lib\concurrent\futures\_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

~\.conda\envs\tf-tutorial\lib\concurrent\futures\thread.py in run(self)
     54 
     55         try:
---> 56             result = self.fn(*self.args, **self.kwargs)
     57         except BaseException as exc:
     58             self.future.set_exception(exc)

~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in _sync_extract(self, from_path, method, to_path)
     92     except BaseException as err:
     93       msg = 'Error while extracting %s to %s : %s' % (from_path, to_path, err)
---> 94       raise ExtractError(msg)
     95     # `tf.io.gfile.Rename(overwrite=True)` doesn't work for non empty
     96     # directories, so delete destination first, if it already exists.

ExtractError: Error while extracting C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip to C:\Users\%user%\tensorflow_datasets\downloads\extracted\ZIP.images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip : C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error

The message seems cryptic to me. The folder to which it is trying to extract does not exist when the notebook is started - it is created by Tensorflow, and only at that command line. I obviously tried deleting it completely and running it again, to no effect.

The code that leads to the error is (everything runs fine until the last line):

import tensorflow as tf
from __future__ import absolute_import, division, print_function, unicode_literals

from tensorflow_examples.models.pix2pix import pix2pix

import tensorflow_datasets as tfds

from IPython.display import clear_output
import matplotlib.pyplot as plt

dataset, info = tfds.load('coco', with_info=True)

Also tried breaking down the last command into assigning the tdfs.builder object and then running download_and_extract, and again got the same error.

There is enough space in disk - after download, still 50+GB available, while the dataset is supposed to be 37GB in its largest version (2014).


Solution

  • I have a similar problem with Windows 10 & COCO 2017. My solution is simple. Extract the ZIP file manually according to the folder path in the error message.