Frequency Distribution Table:

In this discussion, we are going to create a frequency distribution table for a set of data.

prob39= c(70,60,66,54,66,60,55,57,67,71,62,69,57,63,69,58,60,52,63,63,61,65,60,60,73)
range(prob39)
## [1] 52 73
class_size<- (73-52)/5 # Here 5 represents desired number of classes

breaks<-seq(50,75, by=5) 
prob.cut<-cut(prob39,breaks, right=FALSE) 
prob39.freq<-table(prob.cut)
prob39.freq
## prob.cut
## [50,55) [55,60) [60,65) [65,70) [70,75) 
##       2       4      10       6       3
cbind(prob39.freq)
##         prob39.freq
## [50,55)           2
## [55,60)           4
## [60,65)          10
## [65,70)           6
## [70,75)           3
prob39.relfreq<-prob39.freq/length(prob39) #  relative frequency
prob39.relfreq
## prob.cut
## [50,55) [55,60) [60,65) [65,70) [70,75) 
##    0.08    0.16    0.40    0.24    0.12
cumfreq<-cumsum(prob39.freq) # cumulative frequency
cumfreq
## [50,55) [55,60) [60,65) [65,70) [70,75) 
##       2       6      16      22      25
rel.cumfreq<-cumsum(prob39.freq)/length(prob39) # relative cumulative frequency
rel.cumfreq
## [50,55) [55,60) [60,65) [65,70) [70,75) 
##    0.08    0.24    0.64    0.88    1.00
final.table<-cbind(prob39.freq,prob39.relfreq, cumfreq,rel.cumfreq)
final.table
##         prob39.freq prob39.relfreq cumfreq rel.cumfreq
## [50,55)           2           0.08       2        0.08
## [55,60)           4           0.16       6        0.24
## [60,65)          10           0.40      16        0.64
## [65,70)           6           0.24      22        0.88
## [70,75)           3           0.12      25        1.00

cumulative frequency (O-give) curve

cumfreq0<-c(0,cumfreq)  ## there are 6 break points and 5 cumulative frequencies, we are paring them by 
                        #adding 0 on  cumulative frequency. This will make equal number of breaks an cumulative                     frequency
plot(breaks, cumfreq0)

plot of chunk unnamed-chunk-3

rel.cumfreq0<-c(0, rel.cumfreq)
plot(breaks,rel.cumfreq0,main="cumulative frequency distribution", xlab="Age groups", ylab="Cumulative frequency", col="blue")
lines(breaks,rel.cumfreq0, col="blue")

plot of chunk unnamed-chunk-3

Histogram

hist(prob39, prob=TRUE, col="grey", xlab="classes", ylab="density(probability)") # histogram with probability (relative frequency in y-axis)

plot of chunk unnamed-chunk-4

hist(prob39, )  # histogram with frequency (count) in y-axis

plot of chunk unnamed-chunk-4 ### Pie Chart

pie(prob39.freq)

plot of chunk unnamed-chunk-5

Stem Plot

stem(prob39)
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   5 | 24
##   5 | 5778
##   6 | 0000012333
##   6 | 566799
##   7 | 013

Dot Plot

stripchart(prob39,method="stack", offset=0.5,at=0.1,pch = 19,axes=FALSE)
xticks <- seq(45, 80, 5)
axis(1,at=xticks)

plot of chunk unnamed-chunk-7

Yet Another Example of Frequency Distribution table

data<-c(6.1, 6.1, 6.3,6.2,6.2,5.9,5.7,5.8,6.2,5.8,6.1,6.1,6.0,5.9,6.1,5.7,5.9,5.9,6.5,6.1,6.2,6.3,6.5,5.9,6.0,6.2,
6.0,6.2,5.4,6.1,5.7,6.0,5.7,5.7,6.7,6.1)
range(data)
## [1] 5.4 6.7
class.size<-(6.7-5.4)/5
breaks<-seq(5.3,6.9, by= 0.26)
data.cut<-cut(data, breaks, right=FALSE)
freq.table<-table(data.cut)

rel.freq<- freq.table/length(data) # to calculate relative frequency
cum.freq<-cumsum(freq.table)  ## cumulative frequency
rel.cumfreq<-cumsum(freq.table)/length(data)
final.table<-cbind(freq.table, rel.freq, cum.freq, rel.cumfreq)
final.table
##             freq.table rel.freq cum.freq rel.cumfreq
## [5.3,5.56)           1  0.02778        1     0.02778
## [5.56,5.82)          7  0.19444        8     0.22222
## [5.82,6.08)          9  0.25000       17     0.47222
## [6.08,6.34)         16  0.44444       33     0.91667
## [6.34,6.6)           2  0.05556       35     0.97222
## [6.6,6.86)           1  0.02778       36     1.00000

Multiple bar plot (for qualitative variables)

x<-c("a","a","b","b","b","b","b","b")
y<-c("a","a" ,"a" ,"a" ,"a","b","b","b")
cc<-cbind(table(x),table(y))
colnames(cc)<-c("x","y")
barplot(cc) # stacked plot 

plot of chunk unnamed-chunk-9

barplot(cc,beside=TRUE) # side by side plot

plot of chunk unnamed-chunk-9