Solution and implementation for Q2 from Data Science Laboratory (ds).
# Import libraries import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.stats import zscore from sklearn.preprocessing import MinMaxScaler
# Load dataset
df = pd.read_csv("data_2.csv")
print(df)
print(df.isnull().sum())
# Fill missing values df['Age'] = df['Age'].fillna(df['Age'].mean()) df['Attendance'] = df['Attendance'].fillna(df['Attendance'].median()) print(df)
# Statistical summary print(df.describe())
# Boxplot for outliers df[['Age','GPA','Test_Score','Attendance']].boxplot() plt.show()
# Remove outliers z = np.abs(zscore(df[['Age','GPA','Test_Score','Attendance']])) df = df[(z < 2.5).all(axis=1)] print(df)
# Normalize data
scaler = MinMaxScaler()
df[['Age','GPA','Test_Score','Attendance']] = scaler.fit_transform(
df[['Age','GPA','Test_Score','Attendance']]
)
print(df)
# Check skewness print(df[['Age','GPA','Test_Score','Attendance']].skew())
Name,Age,GPA,Test_Score,Attendance
Amit,18.0,8.1,78,85.0
Neha,19.0,7.8,82,90.0
Rahul,,9.0,91,88.0
Priya,20.0,8.5,85,
Karan,21.0,7.2,76,76.0
Sneha,19.0,8.9,88,92.0
Rohit,18.0,3.5,620,87.0
Pooja,20.0,8.0,80,89.0
Arjun,50.0,9.5,92,900.0
Kavya,19.0,8.3,79,84.0