R拟合统计分布并绘制曲线

R拟合统计分布并绘制曲线

在统计分析中, 经常会用到统计分布拟合, 这里介绍MASS程序包的 fitdistr(), 该函数可以用来拟合 "beta", "cauchy", "chi-squared", "exponential", "f", "gamma", "geometric", "log-normal", "lognormal", "logistic", "negative binomial", "normal", "Poisson", "t" 以及 "weibull" 的概率密度分布, 现举例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
library(MASS)

#### Gammadistribution
x <-rgamma(100, shape =5, rate =0.1)
res <- fitdistr(x, "gamma")
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dgamma(xfit,res[[1]][1],res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"beta"
x <-rbeta(100, shape1 =5, shape2 =2)
res <- fitdistr(x, "beta", start=list(shape1 =4, shape2 =1))
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dbeta(xfit,res[[1]][1],res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"cauchy",
"cauchy",
x <-rcauchy(100, location =200, scale=300)
res <- fitdistr(x, "cauchy")
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dcauchy(xfit,res[[1]][1],res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"chi-squared",
"chi-squared",
x <-rchisq(100, df=5, ncp =2)
res <- fitdistr(x, "chi-squared", start=list(df=2, ncp =1))
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dchisq(xfit,res[[1]][1],res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"exponential",
"exponential"
x <-rexp(100, rate =10)
res <- fitdistr(x, "exponential")
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dexp(xfit,res[[1]][1])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"f",
"f"
x <-rf(100, df1 =60, df2 =50)
res <- fitdistr(x, "f", start=list(df1 =10, df2 =2))
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-df(xfit,res[[1]][1], res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"lognormal",
"lognormal"
x <-rlnorm(100, meanlog =5, sdlog =1.5)
res <- fitdistr(x, "lognormal")
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dlnorm(xfit,res[[1]][1], res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"logistic",
"logistic",
x <-rlogis(100, location =5, scale=2)
res <- fitdistr(x, "logistic", start=list(location=5, scale=2))
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dlogis(xfit,res[[1]][1], res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

#### "negativebinomial",
"negative binomial",
x <-rnbinom(100, size =300, prob =.3)
res <- fitdistr(x, "negative binomial")
h <-hist(x)
xfit <-floor(seq(min(x), max(x), by=(max(x)-min(x))/100))
yfit <-dnbinom(xfit,size = res[[1]][1], mu = res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)
####"normal",
"normal"
x <-rnorm(100, mean=15, sd=2)
res <- fitdistr(x, "normal")
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dnorm(xfit, mean= res[[1]][1], sd= res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"Poisson",
"Poisson",
x <-rpois(100, lambda =400)
res <- fitdistr(x, "Poisson")
h <-hist(x)
xfit <-floor(seq(min(x), max(x), by=(max(x)-min(x))/100))
yfit <-dpois(xfit,res[[1]][1])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"t"
"t"
x <-rt(100, df=5)
res <- fitdistr(x, "t")
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dt(xfit,res[[1]][3])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)

####"weibull"
"weibull"
x <-rweibull(100, shape =5, scale=9)
res <- fitdistr(x, "weibull", start=list(shape =1, scale=1))
h <-hist(x)
xfit <-seq(min(x), max(x), by=(max(x)-min(x))/100)
yfit <-dweibull(xfit,res[[1]][1], res[[1]][2])
yfit <- yfit*diff(h$mids[1:2])*length(xfit)
lines(xfit, yfit, col="blue", lwd=2)