-
Notifications
You must be signed in to change notification settings - Fork 39
Description
I'm running into a strange bug whenever Live.log_artifact
tries to raise a warning or error with tensorflow
imported. It should be reproducible below (tensorflow==2.13.0
, dvc==3.38.1
, and dvclive==3.5.1
) as long as you have uncommitted changes in the repo (or some other way to raise a warning/error, like a missing artifact path).
import tensorflow as tf
from dvclive import Live
with Live() as live:
live.log_artifact("file.csv")
raises
ValueError: I/O operation on closed file
Full traceback
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /usr/local/lib/python3.8/dist-packages/dvc/logger.py:156, in LoggerHandler.emit(self, record)
155 msg = self.format(record)
--> 156 Tqdm.write(msg, file=self.stream, end=getattr(self, "terminator", "\n"))
157 self.flush()
File /usr/local/lib/python3.8/dist-packages/tqdm/std.py:725, in tqdm.write(cls, s, file, end, nolock)
723 with cls.external_write_mode(file=file, nolock=nolock):
724 # Write the message
--> 725 fp.write(s)
726 fp.write(end)
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:47, in StreamWrapper.write(self, text)
46 def write(self, text):
---> 47 self.__convertor.write(text)
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:177, in AnsiToWin32.write(self, text)
176 if self.strip or self.convert:
--> 177 self.write_and_convert(text)
178 else:
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:205, in AnsiToWin32.write_and_convert(self, text)
204 cursor = end
--> 205 self.write_plain_text(text, cursor, len(text))
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:210, in AnsiToWin32.write_plain_text(self, text, start, end)
209 if start < end:
--> 210 self.wrapped.write(text[start:end])
211 self.wrapped.flush()
File /usr/local/lib/python3.8/dist-packages/ipykernel/iostream.py:633, in OutStream.write(self, string)
632 msg = "I/O operation on closed file"
--> 633 raise ValueError(msg)
634 else:
ValueError: I/O operation on closed file
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[1], line 5
2 from dvclive import Live
4 with Live() as live:
----> 5 live.log_artifact("file.csv")
File /usr/local/lib/python3.8/dist-packages/dvclive/live.py:628, in Live.__exit__(self, exc_type, exc_val, exc_tb)
626 def __exit__(self, exc_type, exc_val, exc_tb):
627 self._inside_with = False
--> 628 self.end()
File /usr/local/lib/python3.8/dist-packages/dvclive/live.py:605, in Live.end(self)
600 if self._dvcyaml:
601 catch_and_warn(DvcException, logger)(self._dvc_repo.scm.add)(
602 self.dvc_file
603 )
--> 605 self.save_dvc_exp()
607 # Mark experiment as done
608 self.post_to_studio("done")
File /usr/local/lib/python3.8/dist-packages/dvclive/utils.py:183, in catch_and_warn.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
181 def wrapper(*args, **kwargs):
182 try:
--> 183 return func(*args, **kwargs)
184 except exception as e:
185 logger.warning(f"Error in {func.__name__}: {e}")
File /usr/local/lib/python3.8/dist-packages/dvclive/live.py:635, in Live.save_dvc_exp(self)
633 if self._dvcyaml:
634 self._include_untracked.append(self.dvc_file)
--> 635 self._experiment_rev = self._dvc_repo.experiments.save(
636 name=self._exp_name,
637 include_untracked=self._include_untracked,
638 force=True,
639 message=self._exp_message,
640 )
File /usr/local/lib/python3.8/dist-packages/dvc/repo/experiments/__init__.py:376, in Experiments.save(self, *args, **kwargs)
373 def save(self, *args, **kwargs):
374 from dvc.repo.experiments.save import save
--> 376 return save(self.repo, *args, **kwargs)
File /usr/local/lib/python3.8/dist-packages/dvc/repo/experiments/save.py:33, in save(repo, name, force, include_untracked, message)
30 executor = queue.init_executor(repo.experiments, entry)
32 try:
---> 33 save_result = executor.save(
34 executor.info,
35 force=force,
36 include_untracked=include_untracked,
37 message=message,
38 )
39 result = queue.collect_executor(repo.experiments, executor, save_result)
40 finally:
File /usr/local/lib/python3.8/dist-packages/dvc/repo/experiments/executor/base.py:311, in BaseExecutor.save(cls, info, force, include_untracked, message)
309 untracked = dvc.scm.untracked_files()
310 if untracked:
--> 311 logger.warning(
312 "The following untracked files were present in "
313 "the workspace before saving but "
314 "will not be included in the experiment commit:\n"
315 "\t%s",
316 ", ".join(untracked),
317 )
318 info.result_hash = exp_hash
319 info.result_ref = ref
File /usr/lib/python3.8/logging/__init__.py:1458, in Logger.warning(self, msg, *args, **kwargs)
1449 """
1450 Log 'msg % args' with severity 'WARNING'.
1451
(...)
1455 logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1)
1456 """
1457 if self.isEnabledFor(WARNING):
-> 1458 self._log(WARNING, msg, args, **kwargs)
File /usr/lib/python3.8/logging/__init__.py:1589, in Logger._log(self, level, msg, args, exc_info, extra, stack_info, stacklevel)
1586 exc_info = sys.exc_info()
1587 record = self.makeRecord(self.name, level, fn, lno, msg, args,
1588 exc_info, func, extra, sinfo)
-> 1589 self.handle(record)
File /usr/lib/python3.8/logging/__init__.py:1599, in Logger.handle(self, record)
1592 """
1593 Call the handlers for the specified record.
1594
1595 This method is used for unpickled records received from a socket, as
1596 well as those created locally. Logger-level filtering is applied.
1597 """
1598 if (not self.disabled) and self.filter(record):
-> 1599 self.callHandlers(record)
File /usr/lib/python3.8/logging/__init__.py:1661, in Logger.callHandlers(self, record)
1659 found = found + 1
1660 if record.levelno >= hdlr.level:
-> 1661 hdlr.handle(record)
1662 if not c.propagate:
1663 c = None #break out
File /usr/lib/python3.8/logging/__init__.py:954, in Handler.handle(self, record)
952 self.acquire()
953 try:
--> 954 self.emit(record)
955 finally:
956 self.release()
File /usr/local/lib/python3.8/dist-packages/dvc/logger.py:161, in LoggerHandler.emit(self, record)
159 raise
160 except Exception: # noqa: BLE001
--> 161 self.handleError(record)
File /usr/local/lib/python3.8/dist-packages/dvc/logger.py:136, in LoggerHandler.handleError(self, record)
135 def handleError(self, record): # noqa: N802
--> 136 super().handleError(record)
137 raise LoggingException(record)
File /usr/lib/python3.8/logging/__init__.py:1006, in Handler.handleError(self, record)
1004 t, v, tb = sys.exc_info()
1005 try:
-> 1006 sys.stderr.write('--- Logging error ---\n')
1007 traceback.print_exception(t, v, tb, None, sys.stderr)
1008 sys.stderr.write('Call stack:\n')
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:47, in StreamWrapper.write(self, text)
46 def write(self, text):
---> 47 self.__convertor.write(text)
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:177, in AnsiToWin32.write(self, text)
175 def write(self, text):
176 if self.strip or self.convert:
--> 177 self.write_and_convert(text)
178 else:
179 self.wrapped.write(text)
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:205, in AnsiToWin32.write_and_convert(self, text)
203 self.convert_ansi(*match.groups())
204 cursor = end
--> 205 self.write_plain_text(text, cursor, len(text))
File /usr/local/lib/python3.8/dist-packages/colorama/ansitowin32.py:210, in AnsiToWin32.write_plain_text(self, text, start, end)
208 def write_plain_text(self, text, start, end):
209 if start < end:
--> 210 self.wrapped.write(text[start:end])
211 self.wrapped.flush()
File /usr/local/lib/python3.8/dist-packages/ipykernel/iostream.py:633, in OutStream.write(self, string)
631 if self.pub_thread is None:
632 msg = "I/O operation on closed file"
--> 633 raise ValueError(msg)
634 else:
635 is_child = not self._is_master_process()
ValueError: I/O operation on closed file
From what I can tell from the traceback, DVC tries to raise a warning for uncommited changes, which gets logged to the closed stderr
stream, causing the I/O error. The same error will occur for other errors or warnings raised by dvclive
as well. I found a few hints that might help diagnose the issue below, but I think I've reached the limit of what I'm able to figure out.
- I've only encountered this error when
tensorflow
is imported beforedvclive
. When the order is reversed,sys.stderr
remains open and no error is encountered. - I found keras: add test #35 that references ValueError: I/O operation on closed file keras-team/keras#3403. This looks closely related, but all I can tell is that it seems to be a workaround to prevent
sys.stderr
getting closed during testing. - If you watch
sys.stderr
, it becomes closed as soon asgto
is imported bylog_artifact
. Disabling that import and the related calls resolves the issue. Looking at the init for thegto.log
module shows that it's setting some logging config.
Lines 498 to 499 in 5c650ef
from gto.constants import assert_name_is_valid from gto.exceptions import ValidationError
It's possible this is ultimately a Tensorflow/Keras issue rather than a DVC issue, but I'm not sure how to reproduce it without DVC.
Any help would be appreciated, thanks!