P13 EDA.R

From gnewarchaeology wiki
Jump to: navigation, search


  • eda.R

/!\ TODO: Syntax highlight + Literate programming with Org-mode

### p13 exploratory data analysis ###
### workspace associato: eda.RData ###

## load library 
library(MASS)
library(rgl)
library(lattice)

# DATA ENTRY & TRANSFORMATION

## import data.frame
points <- read.csv("/absolute/path/to/points_all.csv")
records <- read.csv("/absolute/path/to/records_all_var.csv")
paleontology <-
taphonomy <-

## create new var by transformation
## s_index (indice di sfericità=l_max/l_med)
index <- records$l_max/records$l_med
records <- cbind(records,index)

## subset of data.frame
indexmin <- which(records$index < 1.5)
fabric <- records[-indexmin,]
fabric <- fabric [-which(is.na(fabric$index)),]
summary(fabric)

stone600 <- which(records$l_max > 600)
fabric600 <- fabric[-stone600, ]
summary(fabric600)

stonemax <- which(records$l_max > 250)
fabric250 <- fabric[-stonemax, ]
summary(fabric250)

# SUMMARY

## summary records
class(records_all)
typeof(records_all)
names(records_all)
summary(records_all)
str(records_all)

## summary points
class(points_all)
typeof(points_all)
names(points_all)
summary(points_all)
str(points_all)

## standard deviation*s_index
sd(s_index, na.rm=TRUE)

# TESTS

## cor.test
## Test for association between paired samples, using one of Pearson's product moment correlation coefficient, Kendall's tau or Spearman's rho.
## correlation s_index*o
cor.test(as.numeric(records_all$o), s_index)
## correlation l_max*o
cor.test(as.numeric(records_all$o), records_all$l_max)

# EXPLORATORY DATA ANALYSIS

## explore index
summary(records)
boxplot(records$index)
sd(index, na.rm=TRUE)
group <- as.factor(records$descrizione) 
color <- c("gold", "gold","gold","red", "blue") 
plot(records$l_max, records$l_med, xlab="lunghezza(mm)", ylab="larghezza(mm)", col=color[group])
abline(lm(records$l_med ~ records$l_max))
legend("bottomright", bty="o", legend=c("Osso/Dente", "Pietra",  "Selce"), col= c("gold","red","blue"),  pt.bg=c("gold","red","blue"),  text.col="black", cex=0.8, pch=16)
cor.test(records$l_max, records$l_med)
plot(records[which(records$index >= 1.5),4], records[which(records$index >= 1.5),7], xlab="US", ylab="records")

## explore orientazione
plot(records$o, records$us, xlab="orientazione", ylab="US")
plot(records$o)
m <- matrix(1:4, ncol=2, byrow=TRUE)
layout(m)
plot(records[which(records$us == "A"),8])
title(main="us A", line=3)
plot(records[which(records$us == "B"),8])
title(main="us B", line=3)
plot(records[which(records$us == "C"),8])
title(main="us C", line=3)
plot(records[which(records$us == "D"),8])
title(main="us D", line=3)

## explore pendenza
plot(records$p2, records$us, xlab="pendenza", ylab="US")
m <- matrix(1:4, ncol=2, byrow=TRUE)
layout(m)
plot(records[which(records$us == "A"),11])
title(main="us A", line=3)
plot(records[which(records$us == "B"),11])
title(main="us B", line=3)
plot(records[which(records$us == "C"),11])
title(main="us C", line=3)
plot(records[which(records$us == "D"),11])
title(main="us D", line=3)
plot(records$p3, records$us, xlab="plunge direction", ylab="US")

## explore ricchezza delle us*desc
plot(records$descrizione, records$us, xlab="records", ylab="US")
m <- matrix(1:4, ncol=2, byrow=TRUE)
layout(m)
plot(records[which(records$us == "A"),7])
title(main="us A", line=3)
plot(records[which(records$us == "B"),7])
title(main="us B", line=3)
plot(records[which(records$us == "C"),7])
title(main="us C", line=3)
plot(records[which(records$us == "D"),7])
title(main="us D", line=3)
#histogram(records$descrizione ~ factor(records$descrizione, levels=c('DENTE', 'OSSO', 'PIETRA', 'SELCE') ) )
#histogram(records$descrizione ~ records$us |factor(records$descrizione, levels=c('DENTE', 'OSSO', 'PIETRA', 'SELCE') ) )
histogram(records$descrizione ~ records$us |factor(records$descrizione, levels=c('DENTE', 'OSSO', 'PIETRA', 'SELCE')), xlab=NULL, col="gray", type = c("count"))
plot(records[which(records$descrizione == "PIETRA"),4], records[which(records$descrizione == "PIETRA"),16], xlab="US", ylab="larghezza(mm)")

## histogram for fabric frame -na
indexmin <- which(records$index < 1.5)
fabric <- records[-indexmin,]
fabric <- fabric [-which(is.na(fabric$index)),]
histogram(fabric$descrizione ~ fabric$us | factor(fabric$descrizione, levels=c('DENTE', 'OSSO', 'PIETRA', 'SELCE')), xlab=NULL, col="gray", type = c("count"))

## histogram for fabric frame -na -pietre enormi
stonemax <- which(records$l_max > 250)
indexmin <- which(records$index < 1.5)
fabric <- records[-indexmin,]
fabric <- fabric [-which(is.na(fabric$index)),]
fabric2 <- fabric[-stonemax, ]
histogram(fabric2$descrizione ~ fabric2$us | factor(fabric2$descrizione, levels=c('DENTE', 'OSSO', 'PIETRA', 'SELCE')), xlab=NULL, col="gray", type = c("count"))

# 3D DIAGRAM

## simple 3d
plot3d(points$x, points$y, points$z)

## 3d records*index
indexmin <- which(records$index<1.5)
group <- as.factor(records$descrizione)
group2 <-as.factor(records$descrizione[-indexmin])
color <- c("yellow","yellow","red", "blue")
rgl.open()
rgl.bg(color="white")
## radius*index[-indexmin]
rgl.spheres(points[-indexmin,2], points[-indexmin,3], points[-indexmin,4], radius=records$index[-indexmin]/100,color=color[group2])
## radius*l_max[-indexmin]
rgl.spheres(points[-indexmin,2], points[-indexmin,3], points[-indexmin,4], radius=records$l_max[-indexmin]/2000,color=color[group])
## grid
x <- 3:9
y <- -1:-3
z <- 1:3
grid3d(c("-x", "-y", "z"), col="black")
axes3d()
rgl.points(points[-indexmin,2], points[-indexmin,3], points[-indexmin,4], radius=records$index[-indexmin]/100,color=color[group2], pch=4)
axes3d()
#rgl.points(points[-indexmin,2], points[-indexmin,3], points[-indexmin,4], radius=records$l_max[-indexmin]/1000,color=color[group2], pch=4)

## 3d records ossid * abras
points_selec <- read.csv("/absolute/path/to/points_selec.csv")
records_selec <- read.csv("/absolute/path/to/records_selec.csv")

points_c <- points_selec
abras <- which(taph$abrasione>=3)
select <- which(as.numeric (taph$ossid[abras]) >= 4)
#records_selec <- read.csv("/absolute/path/to/records_selec.csv")
index <- records_selec$l_max/records_selec$l_med
records_selec<- cbind(records_selec,index)
rgl.open()
rgl.bg(color="white")
rgl.spheres(points[select,2],points[select,3], points[select,4], radius= records_selec$index/100, color="gold")
x <- 3:9
y <- -1:-3
z <- 1:3
grid3d(c("-x", "-y", "z"), col="black")
axes3d()

## 3d records V * P * Inclinati 45°
records_plunge <- read.csv("/absolute/path/to/records_plunge.csv")
index <- records_plunge$l_max/records_plunge$l_med
records_plunge<- cbind(records_plunge,index)
indexmin <- which(records_plunge$index <= 1.5)
records_plunge <- records_plunge[-indexmin,]
p <- as.factor(records_plunge$p2)
color <- c("red","green","blue")
rgl.open()
rgl.bg(color="white")
rgl.spheres(points_all[-indexmin,2],points_all[-indexmin,3], points_all[-indexmin,4], radius=records_plunge$index/100, color=color[p])
x <- 3:9
y <- -1:-3
z <- 1:3
grid3d(c("-x", "-y", "z"), col="black")