library(readr) library(dplyr) # Set seed for reproducibility set.seed(42) # Create dummy data dummy_data <- data.frame( CustomerID = 1:100, PurchaseAmount = runif(100, min = 10, max = 200) ) # Write the dummy data to a CSV file write.csv(dummy_data, "C:/Users/Admin/Documents/r-wrk/customer_purchases.csv", row.names = FALSE) # Read data from the CSV file purchase_data <- read_csv("C:/Users/Admin/Documents/r-wrk/customer_purchases.csv") # Calculate total records and unique customers total_records <- nrow(purchase_data) total_unique_customers <- n_distinct(purchase_data$CustomerID) # Print total records and unique customers cat("Total records:", total_records, "\n") cat("Unique customers:", total_unique_customers, "\n") # Calculate statistics for PurchaseAmount mean_purchase_amount <- mean(purchase_data$PurchaseAmount) median_purchase_amount <- median(purchase_data$PurchaseAmount) std_dev_purchase_amount <- sd(purchase_data$PurchaseAmount) # Print the calculated statistics cat("Mean Purchase Amount:", mean_purchase_amount, "\n") cat("Median Purchase Amount:", median_purchase_amount, "\n") cat("Standard Deviation of Purchase Amounts:", std_dev_purchase_amount, "\n") # Add a new column to segment customers based on median purchase amount purchase_data$Segment <- ifelse( purchase_data$PurchaseAmount < median_purchase_amount, "Low Spender", "High Spender" ) # Print the updated data print(purchase_data) # Plot a histogram of PurchaseAmount hist( purchase_data$PurchaseAmount, main = "Distribution of Purchase Amounts", xlab = "Purchase Amount", col = "blue", border = "black" ) # Load necessary libraries library(readr) library(dplyr) library(readxl) # Read the customer purchases data from the CSV file customer_purchases <- read.csv("C:/Users/pshar/Downloads/customer_purchases.csv") View(customer_purchases) # Alternatively, read the same data using read_csv purchase_data <- read_csv("C:/Users/pshar/Downloads/customer_purchases.csv") # Basic analysis total_records <- nrow(purchase_data) total_unique_customers <- n_distinct(purchase_data$Customer_ID) cat("Total number of records:", total_records, "\n") cat("Total number of unique customers:", total_unique_customers, "\n") # Statistical analysis of Purchase Amount mean_purchase_amount <- mean(purchase_data$Purchase_Amount, na.rm = TRUE) median_purchase_amount <- median(purchase_data$Purchase_Amount, na.rm = TRUE) std_dev_purchase_amount <- sd(purchase_data$Purchase_Amount, na.rm = TRUE) cat("Mean purchase amount:", mean_purchase_amount, "\n") cat("Median purchase amount:", median_purchase_amount, "\n") cat("Standard deviation of purchase amounts:", std_dev_purchase_amount, "\n") # Creating a new column called Segment based on median purchase amount purchase_data$Segment <- ifelse(purchase_data$Purchase_Amount < median_purchase_amount, "Low Spender", "High Spender") # Print the updated purchase data print(purchase_data) # Plot the distribution of Purchase Amount hist(purchase_data$Purchase_Amount, main = "Distribution of Purchase Amounts", xlab = "Purchase Amount", col = "red", border = "black")