#!/bin/bash
# Description: Input list comes in as $1, -R randomizes the input so that we can run this as many times as we want 

if [[ ! -f $1 ]]
then
    echo "Input list does not exist"
    exit
fi

for id in `sort -R $1 | cut -d' ' -f1`; do
	echo $id
	if [[ -f "./$id/$id.aln" ]]; then
		echo " already done!"
    elif [[ -f $id.running ]]; then
        echo " already running!"
	else
		echo " running now.."
        cd dataset
        touch $id.running
        mkdir -p $id
        # Multiple jobs may overrite each others files.. do it inside each job directory..
        cd $id
        rm -f $id.fasta
        if [[ ! -f $id.fasta ]]; then
            # copy fasta file to the current location 
            cp 1353_fasta/$id.fasta .
        fi
        if [[ ! -f $id.aln ]]; then
            jackhmmer --cpu 2 -N 3 -E 1e-10 -A $id.ali $id.fasta /ssdA/common-tools/dncon2-databases/uniref/uniref90pfilt &> jackhmmer.main.log
        fi
        /ssdA/common-tools/reformat.pl -l 1500 -d 1500 sto a3m $id.ali $id.a3m
        egrep -v "^>" $id.a3m | sed 's/[a-z]//g' > $id.aln
        cd ../
        rm -f $id.running
        cd ../
	fi
done
