# Step 1: Load necessary libraries
library(dplyr)
library(ggplot2)
# Step 2: Read data from CSV file
data <- read.csv("your_data.csv")
# Step 3: Data preprocessing
# Replace missing values with median
data$column_with_missing_values[is.na(data$column_with_missing_values)] <- median(data$column_with_missing_values, na.rm = TRUE)
# Step 4: Perform data manipulation using dplyr
data_processed <- data %>%
filter(column1 > 50) %>%
select(column1, column2) %>%
mutate(new_column = column1 * column2)
# Step 5: Generate a ggplot visualization
ggplot(data = data_processed, aes(x = column1, y = column2)) +
geom_point(aes(color = new_column)) +
labs(title = "Scatter plot of column1 vs column2", x = "Column 1", y = "Column 2") +
theme_minimal()