How to compute phylobeta diversity indices?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
## Files required

## phylocom.exe
## phylomatic.exe
## davies_dated.new
## species.table.csv Format shown below:
Family Genus Species
Actinidiaceae Actinidia 1
Actinidiaceae Actinidia 2
Apiaceae Aegopodium 6
...

## sample.csv
01 1 sp1
01 1 sp2
01 1 sp6
01 1 sp10
01 1 sp12
01 1 sp13
...



setwd("C:/Jinlong/phylobeta")
library(picante)
library(simba)
library(hydroTSM)

taxatab <- read.csv("species.table.csv", header = TRUE)

taxatab$SPECIES <- paste("sp", taxatab$SPECIES, sep = "")
taxatab$GENUS <- tolower(taxatab$GENUS)
taxatab$DAVIES_Family <- tolower(taxatab$DAVIES_Family)
write.table(unique(taxatab), "taxatable.txt", row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "/")

shell("phylomatic -f davies_dated.new -t taxatable.txt>phylo.to.modify")
## Delete the outer most edge
dat <- read.csv("sample.csv", header = TRUE)
if(any(nchar(dat$PLOT) == 1)){
dat$PLOT[nchar(dat$PLOT) == 1] <- paste("0", dat$PLOT[nchar(dat$PLOT) == 1], sep = "")
}
dat$PLOT <- paste(dat$PLOT)
dat$SPECIES <- paste("sp", dat$SPECIES, sep = "")
dat$Abund <- rep(1, nrow(dat))
write.table(dat[,c(1, 3, 2)], "sample", quote = FALSE, row.names = FALSE, col.names = FALSE)


## Calculating Diversity metrics using picante
test.tree <- read.tree("phylo")
test.sample <- sample2matrix(dat[,c(1, 3, 2)])

## PhyloSor ## Not available from Phylocom
test.phylosor <- phylosor(samp =test.sample, tree =test.tree)

## UniFrac ## Not available from Phylocom
test.unifrac <- unifrac(comm =test.sample, tree = test.tree)

## Dnn COMDIST
test.comdist <- comdist(comm =test.sample, dis = cophenetictest.tree))

## Dpw comdistnt
test.comdistnt <- comdistnt(comm =test.sample, dis = cophenetictest.tree))

## Rao D and Rao H
test.rao <- raoD(comm =test.sample, phy =test.tree)

## raoD.Dkl ("US.species.raoD.Dkl.pairlist.ver2.csv ") is rao's D
test.raoD <- as.dist(test.rao$Dkl)

# and "US.species.raoD.H.pairlist.ver2.csv" is Rao's H.
test.raoH <- as.dist(test.rao$H)

test.phylosor.list <- liste(test.phylosor)
test.unifrac.list <- liste(test.unifrac)
test.comdist.list <- liste(test.comdist)
test.comdistnt.list <- liste(test.comdistnt)
test.rao.D.list <- liste(test.raoD )
test.rao.H.list <- liste(test.raoH)


result <- data.frame(test.phylosor.list[,3], test.unifrac.list[,3],
test.comdist.list[,3], test.comdistnt.list[,3],
test.rao.D.list[,3], test.rao.H.list[,3])

colnames(result) <- c("phylosor", "unifrac", "Dpw", "Dnn", "raoD", "raoH")

pdf("phylo.beta.pairs.pdf", width = 10, height = 10)
hydropairs(result)
dev.off()

result2 <- data.frame(test.phylosor.list, test.unifrac.list[,3],
test.comdist.list[,3], test.comdistnt.list[,3],
test.rao.D.list[,3], test.rao.H.list[,3])
colnames(result2) <- c("PlotI","PlotII", "phylosor", "unifrac", "comdist", "comdistnt", "raoD", "raoH")

write.csv(result2, "phylobeta.result.csv")


write.csv(test.phylosor.list ,"test.phylosor.list.csv" , row.names = FALSE)
write.csv(test.unifrac.list ,"test.unifrac.list.csv" , row.names = FALSE)
write.csv(test.comdist.list ,"test.Dpw.list.csv" , row.names = FALSE)
write.csv(test.comdistnt.list ,"test.Dnn.list.csv" , row.names = FALSE)
write.csv(test.rao.D.list ,"test.rao.D.list.csv" , row.names = FALSE)
write.csv(test.rao.H.list ,"test.rao.H.list.csv" , row.names = FALSE)