#!/usr/bin/python

import os
import argparse


parser = argparse.ArgumentParser(description='Exosome protein prediction')
parser.add_argument('-f',"--fasta", type=str, help='Sequence file in fasta format', required=True)
parser.add_argument('-w',"--weka_loc", type=str, help='Weka software executable', required=True)
parser.add_argument ("-o", "--output_save", help="Output file name", type=str)


args = parser.parse_args()


file_with_seqs = open(args.fasta ,"r")

sequences = list ()
seqs = ""
codes = list ()

for i in file_with_seqs:
	if i.startswith (">"):
		codes.append (i[i.index (">")+1:].rstrip ("\n"))
		
		if seqs:
			sequences.append (seqs)
			
		seqs = ""
		
	
	else:
		seqs += i.rstrip ("\n")
		
if seqs:
	sequences.append (seqs)
		
		
file_to_save = open ("Seqs_to_read.txt","w")

for i in sequences:
	file_to_save.write (i + "\n")
file_to_save.close ()

os.system ("./seq_coding_test Seqs_to_read.txt dpcomp")


os.system ("java -cp " + args.weka_loc + " weka.classifiers.trees.RandomForest -T  test_dpcomp.arff  -l RandomForest_exosome_prediction_model.model -p 0 > prediction_results.txt")


pred_res = open ("prediction_results.txt", "r")
results_pred = list ()
for x in pred_res:
	if "1:Y" in x:
		results_pred.append(x[x.index("1:Y")+2])
	if "2:N" in x:
		results_pred.append(x[x.index("2:N")+2])

rel_res = dict (zip (codes, results_pred))


if args.output_save:
	
	file_output = open (args.output_save,"w")
	
	file_output.write ("Seq name\tExosome_prediction\n")
	for i in codes:
		try:
			x = i[0:i.index (" ")].rstrip("\n")
			file_output.write (x + "\t" + rel_res [i] + "\n")
		except:
			x = i.replace("\r","").rstrip("\n")
			file_output.write (x + "\t" + rel_res [i] + "\n")
			
	print ("File saved correctly!")

else:
	print ("Seq name\tExosome_prediction")
	for i in codes:
		try:
			x = i[0:i.index (" ")].rstrip("\n")
			print (x + "\t" + rel_res [i])
		except:
			x = i.replace("\r","").rstrip("\n")
			print (x + "\t" + rel_res [i])

os.system ("rm prediction_results.txt")
os.system ("rm Seqs_to_read.txt")
os.system ("rm test_dpcomp.arff")
