setwd("/var/www/html/nifPred/rcode")
library(Biostrings)
#library(BioSeqClass)
library(protr)
library(e1071)
library(R2HTML)
source("feature.R")
if (file.exists("nifpred.html")) file.remove("nifpred.html")
if (file.exists("nifpred.txt")) file.remove("nifpred.txt")
x <- readAAStringSet("test.fasta")
xx <- toupper(as.character(as.character(x)))
###########Sequence filtering##########
#Checking of standard residues#
std <- c("A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
zx <- xx
z <- sapply(zx, function(s) strsplit(s, split=""))
zz <- lapply(z, table)
zz1 <- unique(as.character(unlist(lapply(zz, names))))
if(length(union(zz1, std))>20){
pp <- data.frame("Contain non-standard residues or ambigious character,so kindly submit sequences having standard residues only")
names(pp)<- "error message"
HTML(pp,"nifpred.html")
}else{
###########End#################
zm_ctd <- featureCTD(xx, class = elements("aminoacid"))
res <- matrix(0, nrow = length(x), ncol = 2)
tst <- data.frame(zm_ctd)
s <- rep("u", nrow(tst))
ts <- cbind(s, tst)
SVM_rbf <- readRDS("svm_binary.rds")
kk <- predict(SVM_rbf, newdata = ts[, -1], probability = TRUE)
kz <- attr(kk, "probabilities")
id <- which(kz[, "Y"] > 0.5)
idn <- which(kz[, "Y"] <= 0.5)
res[idn, 2] <- round(kz[, "N"][idn], 3)
res[idn, 1] <- rep("non-nif", length(idn))
mm <- which(!res[, 1] == "non-nif")
if (length(mm) == 0) {
res_f <- res
resff <- data.frame(names(x),res_f)
colnames(resff) <- c("Sequence identifier", "Predicted as", "with probability")
HTML(resff, file = "nifpred.html", align = "center", Border = 1, innerBorder = 1)
write.table(resff, "nifpred.txt", quote=F, sep="\t")
}else {
ts1 <- ts[id, ]
SVM_rbf <- readRDS("jacknife.rds")
kk <- predict(SVM_rbf, newdata = ts1[, -1])
kz <- predict(SVM_rbf, newdata = ts1[, -1], probability = TRUE)
zz <- round(attr(kz, "probabilities"), 3)
res[id, 1] <- as.character(kk)
res[id, 2] <- apply(zz, 1, max)
res_f <- res
resff <- data.frame(names(x),res_f)
colnames(resff) <- c("Sequence identifier", "Predicted as", "with probability")
write.table(resff, "nifpred.txt", quote=F, sep="\t")
kk <- which(!res_f[, 1] == "non-nif")
kx <- x[kk]
zz <- res[kk, ]
if (length(kk) == 1){
res_final <- data.frame(names(kx), zz[1], zz[2])
colnames(res_final) <- c("Sequence identifier", "Predicted as", "with probability")
HTML(res_final, file = "nifpred.html", align = "center", Border = 1, innerBorder = 1)
}else{
zf <- zz[order(data.frame(zz[, 1])), ]
zx <- kx[order(data.frame(zz[, 1]))]
mf <- which(as.numeric(zf[, 2]) >= 0.4)
resf <- zf[mf, ]
seqf <- names(zx[mf])
res_ff <- data.frame(seqf, resf)
if (length(which(res_ff[, 2] == "nifB")) > 3) {
mp_b <- res_ff[which(res_ff[, 2] == "nifB"), ]
mpb <- mp_b[order(data.frame(mp_b[, 3]), decreasing = TRUE),]
mzb <- mpb[1:3, ]
}else {
mzb <- res_ff[which(res_ff[, 2] == "nifB"), ]
}
if (length(which(res_ff[, 2] == "nifD")) > 3) {
mp_d <- res_ff[which(res_ff[, 2] == "nifD"), ]
mpd <- mp_d[order(data.frame(mp_d[, 3]), decreasing = TRUE),]
mzd <- mpd[1:3, ]
}else {
mzd <- res_ff[which(res_ff[, 2] == "nifD"), ]
}
if (length(which(res_ff[, 2] == "nifE")) > 3) {
mp_e <- res_ff[which(res_ff[, 2] == "nifE"), ]
mpe <- mp_e[order(data.frame(mp_e[, 3]), decreasing = TRUE),]
mze <- mpe[1:3, ]
}else {
mze <- res_ff[which(res_ff[, 2] == "nifE"), ]
}
if (length(which(res_ff[, 2] == "nifH")) > 3) {
mp_h <- res_ff[which(res_ff[, 2] == "nifH"), ]
mph <- mp_h[order(data.frame(mp_h[, 3]), decreasing = TRUE),]
mzh <- mph[1:3, ]
}else {
mzh <- res_ff[which(res_ff[, 2] == "nifH"), ]
}
if (length(which(res_ff[, 2] == "nifK")) > 3) {
mp_k <- res_ff[which(res_ff[, 2] == "nifK"), ]
mpk <- mp_k[order(data.frame(mp_k[, 3]), decreasing = TRUE),]
mzk <- mpk[1:3, ]
}else {
mzk <- res_ff[which(res_ff[, 2] == "nifK"), ]
}
if (length(which(res_ff[, 2] == "nifN")) > 3) {
mp_n <- res_ff[which(res_ff[, 2] == "nifN"), ]
mpn <- mp_n[order(data.frame(mp_n[, 3]), decreasing = TRUE), ]
mzn <- mpn[1:3, ]
}else {
mzn <- res_ff[which(res_ff[, 2] == "nifN"), ]
}
res_final <- data.frame(rbind(mzb, mzd, mze, mzh, mzk, mzn))
colnames(res_final) <- c("Sequence identifier", "Predicted as", "with probability")
HTML(res_final, file = "nifpred.html", align = "center", Border = 1, innerBorder = 1)
}
}
}
###########end#########