# File: calrollcall.R # # Purpose: Subset large CA rollcall voting matrices using user defined # selection criteria. # # Author: Jeff Lewis (jblewis@ucla.edu) # # Change log: # # August 2010: Initial version # # Function: getRollCallsByIndex(filen,votes) # # Purpose: Extract from the full matrix of rollcalls a subset of votes # by vote number (id column in the "desc" file # # Arguments: # filen = Name of a CA vote matrix dataset (eg "ca07-08votes.dat") # # votes = Vector of vote ids to extract # getRollCallsByIndex <- function(filen,votes=1:10) { v <- scan(filen,what="character",sep="\n") n <- length(v) k <- length(votes) dat <- matrix(NA,n,k) name <- character(n) # Loop over members and extract selected votes for (i in 1:n) { name[i] <- sub("^\\s+","",substr(v[i],1,20),perl=TRUE) dat[i,] <- strsplit(v[i],split='')[[1]][20+votes] } dat <- as.data.frame( cbind(name,dat) ) names(dat) <- c('member',sprintf("V%i",votes)) # Remove members who cast no votes in the selected subset of rollcalls nonzero <- apply(dat[,-1],1,function(x) sum(x != 0)) dat[nonzero>0,] } # Function: Select votes # # Purpose: Extracts vote from the CA vote description file according to a # given list of criteria # # Arguments: # filen = Name of a CA vote description dataset (eg "ca07-08desc.dat") # # criteria = A list of criteria to be "and"ed together to select # a subset of votes from the full dataset. The keys in the list # identify fields. The values in the list are the criteria to # be applied to the values from the corresponding filed (list key) # # ... = Other arguements to be passed to "grep" function in applying selection # criteria selectVotes <- function(filen,criteria = list(),...) { cnames <- c("id","bill","author","topic","date","location","motion", "yeas","noes","passfail") dat <- read.csv(filen,sep="\t",col.names=cnames, as.is=TRUE) cat(sprintf("%s has %i records...\n",filen, dim(dat)[1])) if (length(criteria)==0) { return(dat) } for (i in 1:length(criteria)) { value = criteria[[i]] field = names(criteria[i]) cat(sprintf("\tLimit to \"%s\" in \"%s\"...\n", value, field)) dat <- dat[ grep(value,dat[,field], ...), ] cat(sprintf("\t\t%i records remaining.\n", dim(dat)[1])) } dat } # # A little example, requires data from ca07-08.zip to be # be located in the current working directory # example <- function() { # Get all Asm Floor votes cat("Get Assembly Floor votes...\n") crit <- list("location"="ASM. FLOOR") desc <- selectVotes("ca07-08desc.dat",criteria=crit) print(table(desc$location)) dat <- getRollCallsByIndex("ca07-08votes.dat",desc$id) print( dim(dat) ) # Get all Sen Floor votes from June of 2008 cat("Get Sen Floor votes from March of 2008...\n") crit <- list("location"="SEN\\. FLOOR", date="06/../2008") desc <- selectVotes("ca07-08desc.dat",criteria=crit, perl=TRUE) print(table(desc$location)) print(table(desc$date)) dat <- getRollCallsByIndex("ca07-08votes.dat",desc$id) print( dim(dat) ) return( list(desc=desc,votedata=dat) ) } # Uncomment next line to run the example... # res <- example()