#!/usr/bin/env Rscript library('ggplot2') library('gridExtra') # Find 2-dimensional non-linear outliers find_outliers = function(datax, datay, thresh=0.5) { ratio = 1 - abs(datax / datay) which(ratio > thresh) } data = read.csv('combined.csv') kiva_yield = as.numeric(sub("%", "", data[[4]])) mix_yield = as.numeric(sub("%", "", data[[5]])) portfolio_outliers = find_outliers(kiva_yield, mix_yield) kiva_fixed = kiva_yield[-portfolio_outliers] mix_fixed = mix_yield[-portfolio_outliers] kiva_profit = as.numeric(sub("%", "", data[[7]])) mix_profit = as.numeric(sub("%", "", data[[8]])) profit_outliers = find_outliers(kiva_profit, mix_profit, 0.6) kiva_prof_fixed = kiva_profit[-profit_outliers] mix_prof_fixed = mix_profit[-profit_outliers] port_cor = cor.test(kiva_yield, mix_yield) port_cor_fixed = cor.test(kiva_fixed, mix_fixed) prof_cor = cor.test(kiva_profit, mix_profit) prof_cor_fixed = cor.test(kiva_prof_fixed, mix_prof_fixed) cat("Portfolio Yield correlation coefficient: ", port_cor$estimate, "(p=", port_cor$p.value, ")\n") cat("Portfolio Yield correlation coefficient without outliers: ", port_cor_fixed$estimate, "(p=", port_cor_fixed$p.value, ")\n") cat("Return on Assets correlation coefficient: ", prof_cor$estimate, "(p=", prof_cor$p.value, ")\n") cat("Return on Assets correlation coefficient without outliers: ", prof_cor_fixed$estimate, "(p=", prof_cor_fixed$p.value, ")\n") df = data.frame(kiva_yield, mix_yield, kiva_profit, mix_profit); yieldplot = ggplot(df, aes(kiva_yield, mix_yield)) + geom_point() + labs(x = "Kiva (%)", y="MIX Market (%)", title="Portfolio Yield") profplot = ggplot(df, aes(kiva_profit, mix_profit)) + geom_point() + labs(x = "Kiva (%)", y="MIX Market (%)", title="Return on Assets") graphs = grid.arrange(yieldplot, profplot, ncol=2) ggsave("graphs.png", graphs, width=6, height=3.5, dpi=300) ggsave("graphs.pdf", graphs) #plot(graphs)