Unverified Commit 1dadb3c6 authored by Marius van den Beek's avatar Marius van den Beek Committed by GitHub
Browse files

Merge pull request #13959 from mvdbeek/tighten_sniffers

[22.01] Tighten sniffers
parents 139963b5 ae69c522
Loading
Loading
Loading
Loading
+26 −27
Original line number Diff line number Diff line
@@ -1166,20 +1166,15 @@ class XYZ(GenericMolFile):
        >>> fname = get_test_fname('Si.cif')
        >>> XYZ().sniff(fname)
        False
        >>> fname = get_test_fname('not_a_xyz_file.txt')
        >>> XYZ().sniff(fname)
        False
        """

        try:
            self.read_blocks(list(file_prefix.line_iterator()))
            return True  # blocks read successfully
        except (TypeError, ValueError):
            return False
        except IndexError as e:
            if "pop from empty list" in str(e):
                # file_prefix ran out mid block with no other errors
                # assume the whole file is ok
            return True
            else:
                # some other IndexError - invalid input
        except (TypeError, ValueError, IndexError):
            return False

    def read_blocks(self, lines):
@@ -1199,7 +1194,7 @@ class XYZ(GenericMolFile):
            n_atoms = None
            comment = None
            atoms = []

            try:
                n_atoms = int(lines.pop(0))
                comment = lines.pop(0)
                for _ in range(n_atoms):
@@ -1213,9 +1208,13 @@ class XYZ(GenericMolFile):
                    position = [float(i) for i in atom[1:4]]

                    atoms.append(symbol + str(position))

                blocks.append({"number_of_atoms": n_atoms, "comment": comment, "atom_data": atoms})

            except IndexError as e:
                if "pop from empty list" in str(e) and blocks:
                    # we'll require at least one valid block
                    pass
                else:
                    raise
        return blocks

    def set_meta(self, dataset, **kwd):
+8 −0
Original line number Diff line number Diff line
@@ -104,6 +104,11 @@ class Phylip(Text):
            if any(str.isdigit(c) for c in seq):
                # Could tighten up further by requiring IUPAC strings chars
                return False
        line = alignment_prefix.readline()
        if line.strip():
            # There should be a newline separating alignments.
            # If we got more content this is probably not a phylip file
            return False
        # There may be more lines with the remaining parts of the sequences
        return True

@@ -119,6 +124,9 @@ class Phylip(Text):
        >>> fname = get_test_fname('test_relaxed_interleaved.phylip')
        >>> Phylip().sniff(fname)
        True
        >>> fname = get_test_fname("not_a_phylip_file.tabular")
        >>> Phylip().sniff(fname)
        False
        """
        f = file_prefix.string_io()
        # Get number of sequences and sequence length from first line
+5 −0
Original line number Diff line number Diff line
1	2
A	B
D	E
X	Y
1	2
+1 −0
Original line number Diff line number Diff line
12345