Unverified Commit 409aef56 authored by mvdbeek's avatar mvdbeek
Browse files

Fix ``Text File Busy`` errors at the source

Comment in line explains the source of the errors. I've created
https://gist.github.com/mvdbeek/682f3495e31275b5f8ed121fabd7103f to
verify that we can reproduce the issue and that the additional locking
fixes it.

I think it's reasonable to block on aquiring the second lock,
however https://stackoverflow.com/a/5255473 is an option we could
implement.
parent 5c246060
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
import fcntl

from galaxy.util import unicodify


def fork_safe_write(path: str, contents: str):
    # The following write method looks a little funky and is inspired by https://twitter.com/_monoid/status/1317895053122150400.
    # This should guarantee that we wait until all forks that inherit open FDs have proceeded to execve,
    # because only then the shared lock can be acquired.
    # This **should** entirely avoid the "Text File Busy" error that `_handle_script_integrity` attempts to deal with.
    # The likelihood of "Text File Busy" happening increases as the load increases, and more work has to be done
    # when forking to copy memory pages, making the fork slower and therefore more likely to happen while the script
    # file is open for writing.
    with open(path, "w", encoding="utf-8") as f:
        f.write(unicodify(contents))
        fcntl.flock(f.fileno(), fcntl.LOCK_EX)
    with open(path) as f:
        fcntl.flock(f.fileno(), fcntl.LOCK_SH)
+2 −3
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ from galaxy.util import (
    unicodify,
)
from galaxy.util.resources import resource_string
from ..fork_safe_write import fork_safe_write

log = logging.getLogger(__name__)
DEFAULT_SHELL = "/bin/bash"
@@ -131,9 +132,7 @@ def write_script(path: str, contents, job_io: DescribesScriptIntegrityChecks, mo
    dir = os.path.dirname(path)
    if not os.path.exists(dir):
        os.makedirs(dir)

    with open(path, "w", encoding="utf-8") as f:
        f.write(unicodify(contents))
    fork_safe_write(path, contents)
    os.chmod(path, mode)
    if job_io.check_job_script_integrity:
        assert job_io.check_job_script_integrity_count is not None