Download and Parse DJ/UBS Commodities Indexes

March 16, 2012
By

(This article was first published on tradeblotter » R, and kindly contributed to R-bloggers)

Here is another data downloading and parsing script, this one for the Dow Jones/UBS Commodities Indexes. Compared to the last post, this parser deals with multiple sheets and multiple columns in each sheet. It also constructs monthly series from the daily data, and stores it using a different symbol. Finally, it’s a good example of how a script can be functionalized, maybe for use with cron.

Let’s look at the code…

download.DJUBS <- function (filesroot = "~/Data/DJUBS") {

  # Load needed packages:
  require(zoo)
  require(gdata)
  require(FinancialInstrument)
  require(quantmod)

  # filesroot: Set the working directory, where there's a .incoming folder that 
  # contains the downloaded spreadsheet.
  
  # Create and set the working directory if it doesn't exist
  if (!file.exists(filesroot))
    dir.create(filesroot, mode="0777")
  
  # Create and set the .incoming directory if it doesn't exist
  if (!file.exists(paste(filesroot, "/.incoming", sep="")))
    dir.create(paste(filesroot, "/.incoming", sep=""), mode="0777")
  setwd(paste(filesroot, "/.incoming", sep=""))
  
  # Remove the old file from .incoming
  if(file.exists("DJUBS_full_hist.xls"))
    system("rm DJUBS_full_hist.xls")
  
  # Download the xls workbook directly from the web site:
  print("Downloading excel spreadsheet from DJUBS web site...")
  system("wget http://www.djindexes.com/mdsidx/downloads/xlspages/ubsci_public/DJUBS_full_hist.xls")
  
  if(!file.exists("DJUBS_full_hist.xls"))
    stop(paste("No spreadsheet exists.  Download the spreadsheet to be processed from www.djindexes.com into ", filesroot, "/.incoming", sep=""))
  
  sheetnames=c("Excess Return", "Total Return")
  for(sheet in sheetnames){  
    print(paste("Reading", sheet, "sheet... This will take a moment..."))
    x = read.xls("DJUBS_full_hist.xls", sheet=sheet)
    
    # Add column names, get the descriptions to add as attributes
    colnames(x)=t(as.data.frame(apply(x[2,], FUN=as.character, MARGIN=1), stringsAsFactors=FALSE))
    x.attr = t(as.data.frame(x[1,], stringsAsFactors=FALSE))
    x=x[-1:-2,]
    
    # Get rid of the last line, which contains the disclaimer
    x=x[-dim(x)[1],]
    # Remove blank columns between sections
    x=x[,-which(apply(x,2,function(x)all(is.na(x))))]
    
    # Get attributes and labels
    categoryNames = x.attr[,!is.na(x.attr)]
    symbolNames = paste(make.names(colnames(x[,])), ".IDX", sep="")
    symbolNamesMonthly = paste(make.names(colnames(x[,])), ".M.IDX", sep="")
    ISOdates = as.Date(x[,1], "%m/%d/%Y")
    
    for(i in 2:length(symbolNames)) {
      # check to make sure directories exist for each symbol, first for daily series...
      dir.create(paste(filesroot, symbolNames[i], sep="/"), showWarnings = FALSE, 
      recursive = FALSE, mode = "0777")
      # ... then for monthly series
      dir.create(paste(filesroot, symbolNamesMonthly[i], sep="/"), showWarnings = FALSE, 
      recursive = FALSE, mode = "0777")
    }
    
    # Parse the columns into individual price objects
    print("Processing columns as symbols...")
    for( i in 2:dim(x)[2]){
      x.xts = as.xts(as.numeric(x[,i]), order.by=ISOdates)
      R.xts = Return.calculate(x.xts)
      x.xts = cbind(x.xts, R.xts)
      colnames(x.xts)=c("Close", "Returns")
      xtsAttributes(x.xts) <- list(Description = paste(categoryNames[,i], sheet, "Index"))
  
      save(x.xts, file=paste(filesroot, symbolNames[i], paste(symbolNames[i], ".rda", sep=""), sep="/"))
      print(paste(symbolNames[i],", ",categoryNames[,i], ", ", sheet, sep=""))
      
      # Describe the metadata for each index
      instrument(symbolNames[i], currency="USD", multiplier=1, tick_size=.01, start_date=head(index(x.xts),1), description=paste(categoryNames[,i], "Index"), data="CR", source="DJUBS", frequency="Daily", assign_i=TRUE)
      
      # Construct a monthly series from the daily series
      x.m.xts = to.monthly(Cl(x.xts))
      x.m.xts = cbind(x.m.xts[,4], Return.calculate(x.m.xts[,4]))
      colnames(x.m.xts)=c("Close","Returns")
      # @ TODO Want to delete the last line off ONLY IF the month is incomplete
      if(tail(index(x.xts),1) != as.Date(as.yearmon(tail(index(x.xts),1)), frac=1)) {
        # That test isn't quite right, but its close.  It won't work on the first
        # day of a new month when the last business day wasn't the last day of 
        # the month.  It will work for the second day.
        x.m.xts = x.m.xts[-dim(x.m.xts)[1],]
      }
        
      # Index is set to last trading day of the month.  
      # Reset index to last day of the month to make alignment easier with other monthly series.  
      index(x.m.xts)=as.Date(index(x.m.xts), frac=1)
        
      xtsAttributes(x.m.xts) <- list(Description = paste(categoryNames[,i], sheet, "Index"))
  
      save(x.m.xts, file=paste(filesroot, symbolNamesMonthly[i], paste(symbolNamesMonthly[i], ".rda", sep=""), sep="/"))
      print(paste(symbolNamesMonthly[i],", ",categoryNames[,i], ", ", sheet, sep=""))
      # Describe the metadata for each index
      instrument(symbolNamesMonthly[i], currency="USD", multiplier=1, tick_size=.01, start_date=head(index(x.xts),1), description=paste(categoryNames[,i], "Index"), data="CR", source="DJUBS", frequency="Monthly", assign_i=TRUE)
    }
  }
  
  setSymbolLookup.FI(base_dir=filesroot, split_method='common')
  print( "Now, whenever you log in you need to register the instruments.  This")
  print( "might be a line you put into .Rprofile so that it happens automatically:")
  print( "> require(quantmod) # this requires a dev build after revision 560 or so.")
  print( "> setSymbolLookup.FI(base_dir=filesroot, split_method='common')")
  print( "Now you should be able to type:")
  print( "> getSymbols('DJUBSTR.IDX') ")
}

The data source provides daily data for the indexes, so the script constructs monthly time series for those symbols as well. Once the data is parsed and stored, we can do some analysis. In a fresh R session, I would do the following to prepare the workspace:
library(quantmod)
library(FinancialInstrument)
library(PerformanceAnalytics)
setSymbolLookup.FI(base_dir="~/Data/DJUBS", split_method='common')

Now I’ll retrieve the data from disk…
getSymbols("DJUBSTR.IDX")
tail(DJUBSTR.IDX)

and chart the DJUBS Total Returns Index:
charts.PerformanceSummary(DJUBSTR.IDX[,"Returns"], ylog=TRUE, wealth.index=TRUE, main = "DJUBS Total Returns Index Returns")

Or I can use quantmod’s charts:
chartSeries(Cl(DJUBSTR.IDX), theme="white")

Those are the daily series, but I want to look at real returns during the last couple of decades so I’ll use the monthly series instead:
symbols=c('DJUBSTR.M.IDX','DJUBS.M.IDX','DJUBSSP.M.IDX')
getSymbols(symbols)

The code is available on R-Forge, in the /inst/parsers directory of the FinancialInstrument package.


To leave a comment for the author, please follow the link and comment on his blog: tradeblotter » R.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Tags:

Comments are closed.