# What's the structure of the dataset? str(cars) # What are the variable names? (Column names in R) colnames(cars) # What are the dimensions? (Number of rows and columns) dim(cars) # How many rows are there? nrow(cars) # How many columns are there? ncol(cars) # R couldn't find a name for the first column, so it named it 'X' # Let's rename it 'name' since it is obviously the car name colnames(cars)[1] = "name" # Now the first column name should be "name" head(cars) help(help) ?read.csv # Indexing # Display the value at row 1, column 3 cars[1,3] # Display rows 2-5, column 6 cars[2:5, 6] # Display all rows, column 1 cars[,1] # Display row 4, all columns cars[4,] # You can also use the '$' to access columns for dataframes. colnames(cars) cars$mpg cars$wt # Analyzing Data # Summary of all variables summary(cars) # Frequency table(cars$gear) # Exercises # What is the mean weight of the cars? # What is the correlation between the weight and the mpg? # What is the horsepower of the Cadillac Fleetwood? # How many cars have six cylinders? # Exercise Answers # What is the mean weight of the cars? mean(cars$wt)*1000 # What is the correlation between the weight and the mpg? cor(cars$mpg,cars$wt) # What is the horsepower of the Cadillac Fleetwood? cars # How many cars have six cylinders? table(cars$cyl) # Creating vectors and matrix vector1 = c(1,2,3,4) vector1 matrix1 = matrix(vector1, nrow=2, byrow=TRUE) # Operators: Examples # Arithmetic Operators 2 + 5 # Logical Operators 6 < 3 "hello" == "HELLO" "hello" == "hello" # Manipulating Data # You can combine indexing with operators # What are the cars that have vs of 1? cars[cars$vs == 1,] # What are the cars that have 6 or more cylinders? cars[cars$cyl >= 6,] # What are the names of the cars that have horsepower > 100? cars$name[cars$hp > 100] # What are the names of the cars that have weight > 3000 lb? cars$name[cars$wt > 3] # What is mean mpg of cars with 4 cylinders? mean(cars$mpg[cars$cyl==4]) # 6 cylinders? mean(cars$mpg[cars$cyl==6]) # Transforming Variables wt.lb = cars$wt * 1000 # Linear Regression Example (OLS) # Dependent variable: mpg # Independent variables: weight, horsepower, number of cylinders regr = lm(cars$mpg ~ wt.lb + cars$hp + cars$cyl) summary(regr) # The new object "regr" is of a special class: lm class(regr) # Here are the things in "regr" that we can look at names(regr) # We can look at the things in the object using # the '$' regr$residuals regr$coefficients