## Market Analysis # Clearing the workspace rm(list=ls(all=TRUE)) # Removes all variables from current workspace if any gc(reset=TRUE) # Collects garbage set.seed(12345) # Initializes random genarator [actually, not needed here, but it's good style] # Loading libraries # If you get error messages here run 'install.packages("package.name")' berofe loading library # i.e 'install.packages("plyr")' then 'library(plyr)' library(plyr) # Data manipulation library(reshape2) # Data manipulation library(lubridate) # Working with dates # Reading the data # In .txt file values are separated by tabs (sep='\t'), first row is header prices <- read.table("data/ADSK_D.txt", sep="\t", header=T) prices$Date <- mdy(prices$Date) # It's good style to have all dates in YYYY-MM-DD format head(prices) # Demonstrates first rows of the data (just for information) ## Imputing new columns to the data # Up/Down: 'c(..)' creates array of values. In this case first value is 'NA' (Not Available - it means missing data) # Then goes logical value, indicating price change # Feel free to run 'head(prices)' to inspect results at any stage prices$updown <- c(NA, prices$Close[-1] > prices$Close[-nrow(prices)]) # 10 Day Sum # Initially we fill column with NAs prices$sum.10 <- NA # Then, starting from 11th row we calculate sum of 'UpDown's in previous 10 rows prices$sum.10[11:nrow(prices)] <- laply(11:nrow(prices), function(i) sum(prices$updown[(i-9):i], na.rm=T)) # Trigger 9 of 10: logical value, it's simple prices$trigger.9 <- prices$sum.10>=9 # Managing overlaps # If 'overlap' is zero or less than zero then we don't care about overlaps (first .xlsx file) # Otherwise, this variable holds minimum number of periods between signals (second .xlsx file) overlap <- 5 # We run this loop for 'overlap' higher than zero if(overlap>0){ # In the loop we screen all overlapped triggers for(i in 11:nrow(prices)){ if(prices$trigger.9[i] & sum(prices$trigger.9[(i-overlap):(i-1)])>0) prices$trigger.9[i] <- FALSE } } # Creates filter to select only triggered days # Function 'which()' returns indices for which logical expression is TRUE # In this case 'filter' holds triggered rows' numbers filter <- which(prices$trigger.9==T & !is.na(prices$trigger.9)) # 5-,10- and 15-days returns: initially filled with NAs prices$ret.15 <- prices$ret.10 <- prices$ret.5 <- NA # Calculates returns for triggered rows for 5-, 10- and 15-days # 'laply(filter, function(i) ...)' means we run function with one parameter i # for each element in 'filter' (i.e. triggered row number). # Inside function we calculate returns. Also we additionally check if there enough rows ahead prices[filter,]$ret.5 <- laply(filter, function(i) ifelse((i+5)<=nrow(prices), prices[i+5,]$Close/prices[i,]$Close-1, NA)) prices[filter,]$ret.10 <- laply(filter, function(i) ifelse((i+10)<=nrow(prices), prices[i+10,]$Close/prices[i,]$Close-1, NA)) prices[filter,]$ret.15 <- laply(filter, function(i) ifelse((i+15)<=nrow(prices), prices[i+15,]$Close/prices[i,]$Close-1, NA)) # Constructs final table with occurences # Here we just filter 'prices' with created 'filter' and columns' names occurences <- prices[filter, c("Date","Close","ret.5","ret.10","ret.15")] occurences # This long command creates summary table # You are free to add rows with specified statistics here final.summary <- summarise(occurences, average.ret.5 = mean(ret.5), average.ret.10 = mean(ret.10), average.ret.15 = mean(ret.15), stdev.ret.5 = sd(ret.5), stdev.ret.10 = sd(ret.10), stdev.ret.15 = sd(ret.15), max.gain.5 = max(ret.5), max.loss.5 = min(ret.5), max.gain.10 = max(ret.10), max.loss.10 = min(ret.10), max.gain.15 = max(ret.15), max.loss.15 = min(ret.15), days.up.5 = sum(ret.5>0)/length(ret.5), days.up.10 = sum(ret.10>0)/length(ret.10), days.up.15 = sum(ret.15>0)/length(ret.15)) # Transposing 'final.summary' for presentation final.summary <- melt(final.summary) final.summary # Saving results to files # For each 'overlap' separate file is created write.table(occurences, paste0("output/occurences", ifelse(overlap>0, overlap, ""), ".csv"), row.names=F, sep=",") write.table(final.summary, paste0("output/summary", ifelse(overlap>0, overlap, ""), ".csv"), row.names=F, sep=",")