head(tat)
nrow(tat)
colSums( is.na(tat) )
# 나이의 결측치를 나이의 평균값으로 치환
max(tat$age)
tat$age[is.na(tat$age)] <- max(tat$age)
colSums( is.na(tat) )
# 나이의 결측치를 나이의 평균값으로 치환
max(tat$age)
tat <- read.csv("tatanic.csv", stringsAsFactors = TRUE)
View(tat)
head(tat)
nrow(tat)
colSums( is.na(tat) )
# 나이의 결측치를 나이의 평균값으로 치환
max(tat$age)
colSums( is.na(tat) )
# 나이의 결측치를 나이의 평균값으로 치환
max(tat$age, na.rm=TRUE)
# 나이의 결측치를 나이의 평균값으로 치환
max(tat$age, na.rm=TRUE)
tat$age[is.na(tat$age)] <- max(tat$age, na.rm=TRUE)
colSums( is.na(tat) )
library(outliers)
grubbs.flag <- function(x) {
outliers <- NULL
test <- x
grubbs.result <- grubbs.test(test)
pv <- grubbs.result$p.value
while(pv < 0.05) {
outliers <- c(outliers,as.numeric(strsplit(grubbs.result$alternative," ")[[1]][3]))
test <- x[!x %in% outliers]
grubbs.result <- grubbs.test(test)
pv <- grubbs.result$p.value
}
return(data.frame(X=x,Outlier=(x %in% outliers)))
}
wisc <- read.csv("tatanic.csv")
for (i in c(2,3,5,6,8)){
a = grubbs.flag(wisc[,colnames(wisc)[i]])
b = a[a$Outlier==TRUE,"Outlier"]
print ( paste( colnames(wisc)[i] , '--> ',  length(b) )  )
}
library(caret)
library(C50)
library(irr)
nrow(tat)
#0.shuffle 을 먼저 합니다.
set.seed(123)
tat_shuffle <- sample(1:891, 891)
tat_shuffle
tat2 <- tat[tat_shuffle,]
tat2
set.seed(123)
in_train <- createDataPartition(tat2$survived, p = 0.75, list = FALSE)
tat_train <- tat2[in_train, ] # 훈련 데이터 구성
tat_test <- tat2[-in_train, ] # 테스트 데이터 구성
nrow(tat_train) # 669
nrow(tat_test)  # 222
m <- train( survived~ . , data=tat_train, method="rf" )
# 랜덤포레스트: 의사결정트리 + 앙상블 기법
m # 튜닝한 결과를 확인할 수 있다.
p <- predict( m , tat_test )
p
# 0 ~ 1사이의 확률로 나오기 때문에 0.5 이상인것은 1이 되게함
round(p)
table(round(p), tat_test$survived)
library(gmodels)
y <- CrossTable(tat_test$survived ,round(p) )
sum(y$prop.tbl * diag(2))
boston <- read.csv("d:\\data\\boston.csv" )
str(boston)
View(boston)
boston <- read.csv("d:\\data\\boston.csv" )
str(boston)
View(boston)
head(boston)
nrow(boston)
library(data.table)
boston <- read.csv("d:\\data\\boston.csv" )
nrow(boston)
#0.shuffle 을 먼저 합니다.
set.seed(123)
boston <- boston(1:506, 506)
boston <- read.csv("d:\\data\\boston.csv" )
nrow(boston)
#0.shuffle 을 먼저 합니다.
set.seed(123)
boston <- boston(1:506, 506)
boston <- sample(1:506, 506)
boston_shuffle
boston <- read.csv("d:\\data\\boston.csv" )
nrow(boston)
#0.shuffle 을 먼저 합니다.
set.seed(123)
boston_shuffle <- sample(1:506, 506)
boston2 <- boston[boston_shuffle,]
boston2
set.seed(1234)
ind <- sample(2, nrow(boston2), replace = T, prob = c(0.8, 0.2))
boston_train <- boston2[ind==1,]
boston_test <- boston2[ind==2,]
nrow(boson_test)
boston_train <- boston2[ind==1,]
boston_test <- boston2[ind==2,]
nrow(boston_train)
nrow(boston_test)
boston_train$lstat_rm <- ifelse( ( boston_train$lstat >5  &  boston_train$rm <= 5) , 1, 0 )
boston_train$lstat
boston_train
source('~/.active-rstudio-document', encoding = 'UTF-8', echo=TRUE)
lower('AAA')
tolower('AAA')
colname(boston_train)
colnames(boston_train)
colnames(boston_train)  <- tolower(colnames(boston_train))
str(boston_train)
boston_train$lstat
boston_train$lstat_rm <- ifelse( ( boston_train$lstat >5  &  boston_train$rm <= 5) , 1, 0 )
boston_test$lstat_rm <- ifelse( ( boston_test$lstat >5  &  boston_test$rm <= 5) , 1, 0 )
colnames(boston_test)  <- tolower(colnames(boston_test))
str(boston_test)
boston_train$lstat_rm <- ifelse( ( boston_train$lstat >5  &  boston_train$rm <= 5) , 1, 0 )
boston_test$lstat_rm <- ifelse( ( boston_test$lstat >5  &  boston_test$rm <= 5) , 1, 0 )
library(data.table)
boston <- read.csv("d:\\data\\boston.csv" )
nrow(boston)
#0.shuffle 을 먼저 합니다.
set.seed(123)
boston_shuffle <- sample(1:506, 506)
boston2 <- boston[boston_shuffle,]
boston2
# 훈련 데이터  80%, 테스트 데이터 20%
set.seed(1234)
ind <- sample(2, nrow(boston2), replace = T, prob = c(0.8, 0.2))
boston_train <- boston2[ind==1,]
boston_test <- boston2[ind==2,]
nrow(boston_train)  # 411
nrow(boston_test)     #95
tolower('AAA')
colnames(boston_train)  <- tolower(colnames(boston_train))
colnames(boston_test)  <- tolower(colnames(boston_test))
str(boston_train)
str(boston_test)
boston_train$lstat
boston_train$lstat_rm <- ifelse( ( boston_train$lstat >5  &  boston_train$rm <= 5) , 1, 0 )
boston_test$lstat_rm <- ifelse( ( boston_test$lstat >5  &  boston_test$rm <= 5) , 1, 0 )
boston_train$age_indus <- ifelse( ( boston_train$age < 30 & boston_train$indus <= 10) , 1, 0 )
boston_test$age_ind <- ifelse( ( boston_test$age < 30 & boston_test$indus <= 10) , 1, 0 )
# 1-1. 결측치 확인
sum(is.na(boston_train)) #0개
sum(is.na(boston_test)) #0개
# 2) 정규화
#데이터 정규화를 위한 함수생성
normalize <- function(x) {
return ( (x-min(x)) / (max(x) - min(x) ) )
}
boston_train_norm <- as.data.frame(lapply(boston_train[,-1], normalize))
boston_test_norm <- as.data.frame(lapply(boston_test[,-1],normalize) )
boston_train_norm
summary(boston_train_norm)
#정규화 결과 확인
summary(boston_train_norm)
summary(boston_test_norm)
# 1-1. 다중공선성 확인
#install.packages("car")
library(car)
lml <- lm(medv~., data=boston_train_norm)
vif(lml) # 팽창지수(=다중공선성=vif)
lml <- lm(price~., data=boston_train_norm)
vif(lml) # 팽창지수(=다중공선성=vif)
vif(lml) > 5 #vif 5이상이면 TRUE / 5이하면 FALSE
vif(lml) > 10 #vif 5이상이면 TRUE / 5이하면 FALSE
#install.packages("caret")
library(caret)
nearZeroVar(boston_test_norm, saveMetrics = TRUE) # all_FALSE (제거대상 없음)
nearZeroVar(boston_train_norm, saveMetrics = TRUE)# all_FALSE (제거대상 없음)
# 1-3. 상관관계
#install.packages("corrplot")
library(corrplot)
#기본값은 원형, shade=네모칸, ellipse = 타원(양의상관 오른쪽, 음의상관 왼쪽)
# circle = 원형, number = 수치로 표현
corrplot(cor(boston_train_norm), method = "number")
#install.packages("randomForest")
library(randomForest)
#rf <- randomForest(medv~., data=boston_train_norm)
rf <- randomForest(medv~., data=boston_train_norm, importance=TRUE)
#rf <- randomForest(medv~., data=boston_train_norm)
rf <- randomForest(price~., data=boston_train_norm, importance=TRUE)
varImp(rf)
importance(rf) #IncMSE = 정확도, IncNodePurity = 중요도 = importance
# 2) plot형태로 시각화해서 확인
varImpPlot(rf, main="Varplot of boston_train")
# vif 확인시 만든 lml과 동일
model1 <- lm(formula=medv ~ crim + chas + nox + rm + dis + rad + ptratio + lstat, data = boston_train_norm)
# vif 확인시 만든 lml과 동일
model1 <- lm(formula=price ~ crim + chas + nox + rm + dis + rad + ptratio + lstat, data = boston_train_norm)
model1
summary(model1)
#stepwise(단계적 회귀분석 방법)로 가장 유효한 변수 확인하기
lm2 <- step(model1, direction="both")
lm2
#2번째 시도 (상관관계 분석을 통해 알아낸 사실 포함하여 적용)
lml2<-step(lml, direction="both")
#stepwise + 상관계수 반영한 결과 고려하여 모델링
boston_reg_model <- lm(medv ~ crim + zn + indus + chas + nox + rm + dis + rad + tax +   ptratio + black + lstat + lstat_rm, data=boston_train_norm )
#stepwise + 상관계수 반영한 결과 고려하여 모델링
boston_reg_model <- lm(price ~ crim + zn + indus + chas + nox + rm + dis + rad + tax +   ptratio + black + lstat + lstat_rm, data=boston_train_norm )
#stepwise + 상관계수 반영한 결과 고려하여 모델링
boston_reg_model <- lm(price ~ crim + zn + indus + chas + nox + rm + dis + rad + tax +   ptratio + black + lstat + lstat_rm, data=boston_train_norm )
#stepwise + 상관계수 반영한 결과 고려하여 모델링
boston_reg_model <- lm(price ~ crim + zn + indus + chas + nox + rm + dis + rad + tax +   ptratio + b + lstat + lstat_rm, data=boston_train_norm )
model_results <-  predict(boston_reg_model, boston_test_norm)
model_results
# 2-2. 결과값 역정규화 (kaggle과 데이터 형식 맞추기)
denormalize <- function(x) { return ( x*45+5) }    #max(train_data$medv)  #min(train_data$medv)
pred_medv_un <- denormalize(model_results)
sample <- cbind(boston_test$x, pred_medv_un)
head(sample)
colnames(sample) <- c("id", "medv")
write.csv(sample, "Submission_sample16.csv", row.names=FALSE)
cor(boston_test$price, sample$medv)
boston_test$price
head(sample)
cor(boston_test$price, sample$medv)
head(sample)
cor(boston_test$price, sample$medv)
str(boston_test$price)
str(sample$medv)
sample$medv
colnames(sample) <- c("id", "medv")
head(sample)
str(sample)
sample <- as.data.frame(sample)
str(sample)
cor(boston_test$price, sample$medv)
mtcars
head(mtcars)
setwd("D:\\yys26")
packages <- c('imager', "shiny", "jpeg", "png", "reticulate", "devtools")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
if (length(setdiff("keras", rownames(installed.packages()))) > 0) {
devtools::install_github("rstudio/keras")
}
require(imager)
require(shiny)
require(jpeg)
require(png)
library(reticulate)
library(keras)
#setwd(tempfile())
#setwd("/Users/aiden/Desktop/data/cifar10_densenet")
load("envir.RData")
model<<-load_model_hdf5("densenet.h5")
synsets <<- readLines("synset.txt")
server <- shinyServer(function(input, output) {
ntext <- eventReactive(input$goButton, {
print(input$url)
if (input$url == "http://") {
NULL
} else {
tmp_file <- tempfile()
download.file(input$url, destfile = tmp_file, mode = 'wb')
tmp_file
}
})
output$originImage = renderImage({
list(src = if (input$tabs == "Upload Image") {
if (is.null(input$file1)) {
if (input$goButton == 0 || is.null(ntext())) {
'dog.jpg'
} else {
ntext()
}
} else {
input$file1$datapath
}
} else {
if (input$goButton == 0 || is.null(ntext())) {
if (is.null(input$file1)) {
'dog.jpg'
} else {
input$file1$datapath
}
} else {
ntext()
}
},
title = "Original Image")
}, deleteFile = FALSE)
output$res <- renderText({
src = if (input$tabs == "Upload Image") {
if (is.null(input$file1)) {
if (input$goButton == 0 || is.null(ntext())) {
'dog.jpg'
} else {
ntext()
}
} else {
input$file1$datapath
}
} else {
if (input$goButton == 0 || is.null(ntext())) {
if (is.null(input$file1)) {
'dog.jpg'
} else {
input$file1$datapath
}
} else {
ntext()
}
}
img <- load.image(src)
plot(img)
img <- image_load(src, target_size = c(32,32))
img
x <- image_to_array(img)
# ensure we have a 4d tensor with single element in the batch dimension,
x <- array_reshape(x, c(1, dim(x)))
# normalize
x[,,,1] <- (x[,,,1] - mea1) / sds1
x[,,,2] <- (x[,,,2] - mea2) / sds2
x[,,,3] <- (x[,,,3] - mea3) / sds3
# predcit
preds <- model %>% predict(x)
# output result as string
max.idx <- order(preds[1,], decreasing = TRUE)[1]
result <- synsets[max.idx]
res_str <- ""
tmp <- strsplit(result[1], " ")[[1]]
res_str <- paste0(res_str, tmp[2])
res_str
})
})
require(imager)
require(shiny)
require(jpeg)
require(png)
ui <- shinyUI(
fluidPage(
includeCSS("bootstrap.css"),
pageWithSidebar(
headerPanel(title = '피부암 인식 using DenseNet',
windowTitle = 'Image Classification(피부암) using DenseNet'),
fluidRow(
column(1),
column(9,
tabsetPanel(
id = "tabs",
tabPanel("Upload Image",
fileInput('file1', 'Upload a PNG / JPEG File:')),
tabPanel(
"Use the URL",
textInput("url", "Image URL:", "http://"),
actionButton("goButton", "Go!")
)
),
h3(titlePanel("DESCRIPTION - 피부병 분류")),
h3(titlePanel("정상 피부와 피부암"))
),
column(2)
),
mainPanel(
h3("Image"),
tags$hr(),
imageOutput("originImage", height = "auto"),
tags$hr(),
h3("What is this?"),
tags$hr(),
verbatimTextOutput("res")
)
)))
shinyApp(ui = ui, server = server)
setwd("D:\\yys26")
packages <- c('imager', "shiny", "jpeg", "png", "reticulate", "devtools")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
if (length(setdiff("keras", rownames(installed.packages()))) > 0) {
devtools::install_github("rstudio/keras")
}
require(imager)
require(shiny)
require(jpeg)
require(png)
library(reticulate)
library(keras)
#setwd(tempfile())
#setwd("/Users/aiden/Desktop/data/cifar10_densenet")
load("envir.RData")
model<<-load_model_hdf5("densenet.h5")
synsets <<- readLines("synset.txt")
server <- shinyServer(function(input, output) {
ntext <- eventReactive(input$goButton, {
print(input$url)
if (input$url == "http://") {
NULL
} else {
tmp_file <- tempfile()
download.file(input$url, destfile = tmp_file, mode = 'wb')
tmp_file
}
})
output$originImage = renderImage({
list(src = if (input$tabs == "Upload Image") {
if (is.null(input$file1)) {
if (input$goButton == 0 || is.null(ntext())) {
'dog.jpg'
} else {
ntext()
}
} else {
input$file1$datapath
}
} else {
if (input$goButton == 0 || is.null(ntext())) {
if (is.null(input$file1)) {
'dog.jpg'
} else {
input$file1$datapath
}
} else {
ntext()
}
},
title = "Original Image")
}, deleteFile = FALSE)
output$res <- renderText({
src = if (input$tabs == "Upload Image") {
if (is.null(input$file1)) {
if (input$goButton == 0 || is.null(ntext())) {
'dog.jpg'
} else {
ntext()
}
} else {
input$file1$datapath
}
} else {
if (input$goButton == 0 || is.null(ntext())) {
if (is.null(input$file1)) {
'dog.jpg'
} else {
input$file1$datapath
}
} else {
ntext()
}
}
img <- load.image(src)
plot(img)
img <- image_load(src, target_size = c(32,32))
img
x <- image_to_array(img)
# ensure we have a 4d tensor with single element in the batch dimension,
x <- array_reshape(x, c(1, dim(x)))
# normalize
x[,,,1] <- (x[,,,1] - mea1) / sds1
x[,,,2] <- (x[,,,2] - mea2) / sds2
x[,,,3] <- (x[,,,3] - mea3) / sds3
# predcit
preds <- model %>% predict(x)
# output result as string
max.idx <- order(preds[1,], decreasing = TRUE)[1]
result <- synsets[max.idx]
res_str <- ""
tmp <- strsplit(result[1], " ")[[1]]
res_str <- paste0(res_str, tmp[2])
res_str
})
})
require(imager)
require(shiny)
require(jpeg)
require(png)
ui <- shinyUI(
fluidPage(
includeCSS("bootstrap.css"),
pageWithSidebar(
headerPanel(title = '피부암 인식 using DenseNet',
windowTitle = 'Image Classification(피부암) using DenseNet'),
fluidRow(
column(1),
column(9,
tabsetPanel(
id = "tabs",
tabPanel("Upload Image",
fileInput('file1', 'Upload a PNG / JPEG File:')),
tabPanel(
"Use the URL",
textInput("url", "Image URL:", "http://"),
actionButton("goButton", "Go!")
)
),
h3(titlePanel("DESCRIPTION - 피부병 분류")),
h3(titlePanel("정상 피부와 피부암"))
),
column(2)
),
mainPanel(
h3("Image"),
tags$hr(),
imageOutput("originImage", height = "auto"),
tags$hr(),
h3("What is this?"),
tags$hr(),
verbatimTextOutput("res")
)
)))
shinyApp(ui = ui, server = server)
