Commit d6a575d3 authored by Alexander, William's avatar Alexander, William
Browse files

Fixed issue where motif fails on 0 methyated datasets

Also fixed overwrite flag (-w), works as intended now
parent 271e4801
Loading
Loading
Loading
Loading
+8 −5
Original line number Diff line number Diff line
@@ -314,8 +314,12 @@ def memeLoop(folder: str, inDF, genomeDict: dict, threads: int, sampleSize: int,
    complete = 'no'
    inDF = inDF.drop(columns=['end', 'start1', 'end1', 'color', 'score'])
    workingDF = dfFilter(inDF, 15, 90, 0)
    methylSet = set((dfFilter(inDF, 15, 40, 5, 1)
                     .sort_values(by=['coord']))['coord'].unique())
    methylDF = dfFilter(inDF, 15, 40, 5, 1)
    methylSet = None
    if len(methylDF) == 0:
        complete = 'yes'
    else:
        methylSet = set((methylDF.sort_values(by=['coord']))['coord'].unique())
    # below is an emperically determined cutoff that explains most Ecol data and finds all motifs off the bat
    # workingDF = dfFilter(inDF, 15, 95, 0)
    # methylSet = set((dfFilter(inDF, 15, 80, 5, 1)
@@ -443,8 +447,7 @@ if __name__ == '__main__':
    # meant to delete the MEME output folder to make repetitive running on a dataset easier, currently throws an error
    os.chdir(args.folder)
    if os.path.isdir('./meme') and args.overwrite:
        os.rmdir('./meme')
    else:
        os.system('rm -rf ./meme')
    os.mkdir('./meme')

    # converts genome to a dict of sequences