library(readr)
library(dplyr)

# Set seed for reproducibility
set.seed(42)

# Create dummy data
dummy_data <- data.frame(
  CustomerID = 1:100,
  PurchaseAmount = runif(100, min = 10, max = 200)
)

# Write the dummy data to a CSV file
write.csv(dummy_data, "C:/Users/Admin/Documents/r-wrk/customer_purchases.csv", row.names = FALSE)

# Read data from the CSV file
purchase_data <- read_csv("C:/Users/Admin/Documents/r-wrk/customer_purchases.csv")

# Calculate total records and unique customers
total_records <- nrow(purchase_data)
total_unique_customers <- n_distinct(purchase_data$CustomerID)

# Print total records and unique customers
cat("Total records:", total_records, "\n")
cat("Unique customers:", total_unique_customers, "\n")

# Calculate statistics for PurchaseAmount
mean_purchase_amount <- mean(purchase_data$PurchaseAmount)
median_purchase_amount <- median(purchase_data$PurchaseAmount)
std_dev_purchase_amount <- sd(purchase_data$PurchaseAmount)

# Print the calculated statistics
cat("Mean Purchase Amount:", mean_purchase_amount, "\n")
cat("Median Purchase Amount:", median_purchase_amount, "\n")
cat("Standard Deviation of Purchase Amounts:", std_dev_purchase_amount, "\n")

# Add a new column to segment customers based on median purchase amount
purchase_data$Segment <- ifelse(
  purchase_data$PurchaseAmount < median_purchase_amount,
  "Low Spender",
  "High Spender"
)

# Print the updated data
print(purchase_data)

# Plot a histogram of PurchaseAmount
hist(
  purchase_data$PurchaseAmount,
  main = "Distribution of Purchase Amounts",
  xlab = "Purchase Amount",
  col = "blue",
  border = "black"
)


# Load necessary libraries
library(readr)
library(dplyr)
library(readxl)

# Read the customer purchases data from the CSV file
customer_purchases <- read.csv("C:/Users/pshar/Downloads/customer_purchases.csv")
View(customer_purchases)

# Alternatively, read the same data using read_csv
purchase_data <- read_csv("C:/Users/pshar/Downloads/customer_purchases.csv")

# Basic analysis
total_records <- nrow(purchase_data)
total_unique_customers <- n_distinct(purchase_data$Customer_ID)

cat("Total number of records:", total_records, "\n")
cat("Total number of unique customers:", total_unique_customers, "\n")

# Statistical analysis of Purchase Amount
mean_purchase_amount <- mean(purchase_data$Purchase_Amount, na.rm = TRUE)
median_purchase_amount <- median(purchase_data$Purchase_Amount, na.rm = TRUE)
std_dev_purchase_amount <- sd(purchase_data$Purchase_Amount, na.rm = TRUE)

cat("Mean purchase amount:", mean_purchase_amount, "\n")
cat("Median purchase amount:", median_purchase_amount, "\n")
cat("Standard deviation of purchase amounts:", std_dev_purchase_amount, "\n")

# Creating a new column called Segment based on median purchase amount
purchase_data$Segment <- ifelse(purchase_data$Purchase_Amount < median_purchase_amount, 
                                "Low Spender", "High Spender")

# Print the updated purchase data
print(purchase_data)

# Plot the distribution of Purchase Amount
hist(purchase_data$Purchase_Amount, 
     main = "Distribution of Purchase Amounts",
     xlab = "Purchase Amount", 
     col = "red", 
     border = "black")