import pandas as pd# -------------------------------# Step 1: Extract# -------------------------------df = pd.read_csv("employees_raw.csv")print("=== RAW DATA (Extracted) ===")print(df.head(10))# -------------------------------# Step 2: Transform# -------------------------------# Fill missing salary with meandf["Salary"].fillna(df["Salary"].mean(), inplace=True)# Standardize department names to uppercasedf["Department"] = df["Department"].str.upper()# Filter rows with Salary > 40000df_filtered = df[df["Salary"] > 40000]# Add Experience Level columndf_filtered["ExperienceLevel"] = pd.cut( df_filtered["Experience(Years)"], bins=[0, 3, 7, 20], labels=["Junior", "Mid-level", "Senior"])print("\n=== TRANSFORMED DATA ===")print(df_filtered.head(10))# -------------------------------# Step 3: Load# -------------------------------df_filtered.to_csv("employees_clean.csv", index=False)print("\n✅ Clean data has been saved to 'employees_clean.csv'")print(f"Total Rows Before: {len(df)} | After Transformation: {len(df_filtered)}")
output
> python 5.py=== RAW DATA (Extracted) === EmpID Name Department Salary Experience(Years)0 101 Alice HR 35000.0 11 102 Bob Finance 45000.0 32 103 Charlie IT 60000.0 73 104 David IT 50000.0 54 105 Eve Finance NaN 25 106 Frank HR 42000.0 46 107 Grace Finance 39000.0 27 108 Helen IT 70000.0 108 109 Ivy Finance 48000.0 69 110 Jack HR 41000.0 3=== TRANSFORMED DATA === EmpID Name Department Salary Experience(Years) ExperienceLevel1 102 Bob FINANCE 45000.000000 3 Junior2 103 Charlie IT 60000.000000 7 Mid-level EmpID Name Department Salary Experience(Years) ExperienceLevel1 102 Bob FINANCE 45000.000000 3 Junior2 103 Charlie IT 60000.000000 7 Mid-level1 102 Bob FINANCE 45000.000000 3 Junior2 103 Charlie IT 60000.000000 7 Mid-level2 103 Charlie IT 60000.000000 7 Mid-level3 104 David IT 50000.000000 5 Mid-level4 105 Eve FINANCE 48065.217391 2 Junior5 106 Frank HR 42000.000000 4 Mid-level7 108 Helen IT 70000.000000 10 Senior8 109 Ivy FINANCE 48000.000000 6 Mid-level4 105 Eve FINANCE 48065.217391 2 Junior5 106 Frank HR 42000.000000 4 Mid-level7 108 Helen IT 70000.000000 10 Senior8 109 Ivy FINANCE 48000.000000 6 Mid-level5 106 Frank HR 42000.000000 4 Mid-level7 108 Helen IT 70000.000000 10 Senior8 109 Ivy FINANCE 48000.000000 6 Mid-level7 108 Helen IT 70000.000000 10 Senior8 109 Ivy FINANCE 48000.000000 6 Mid-level8 109 Ivy FINANCE 48000.000000 6 Mid-level9 110 Jack HR 41000.000000 3 Junior9 110 Jack HR 41000.000000 3 Junior10 111 Karen IT 55000.000000 8 Senior11 112 Leo FINANCE 58000.000000 9 Senior✅ Clean data has been saved to 'employees_clean.csv'Total Rows Before: 25 | After Transformation: 18
import pandas as pd# -------------------------------# Step 1: Extract# -------------------------------df = pd.read_csv("aoi_raw.csv")print("=== RAW DATA ===")print(df)# -------------------------------# Step 2: Define Concept Hierarchies# -------------------------------city_to_state = { "Hyderabad": "Telangana", "Delhi": "Delhi", "Mumbai": "Maharashtra", "Pune": "Maharashtra", "Chennai": "Tamil Nadu"}state_to_country = { "Telangana": "India", "Delhi": "India", "Maharashtra": "India", "Tamil Nadu": "India"}def generalize_age(age): if age <= 20: return "Teen" elif 21 <= age <= 30: return "Young" elif 31 <= age <= 40: return "Adult" else: return "Senior"# -------------------------------# Step 3: Apply AOI (Generalization)# -------------------------------df["State"] = df["City"].map(city_to_state)df["Country"] = df["State"].map(state_to_country)df["AgeGroup"] = df["Age"].apply(generalize_age)# Drop detailed columns (low-level)df_generalized = df.drop(columns=["City", "Age"])print("\n=== GENERALIZED DATA (AOI Result) ===")print(df_generalized)# -------------------------------# Step 4: Load/Save Result# -------------------------------df_generalized.to_csv("aoi_generalized.csv", index=False)print("\n✅ Generalized data saved to 'aoi_generalized.csv'")
output
> python 6.py=== RAW DATA === StudentID Name City Age Department0 1 Alice Hyderabad 21 CSE1 2 Bob Hyderabad 25 CSE2 3 Charlie Delhi 19 ECE3 4 David Delhi 32 ECE4 5 Eve Mumbai 45 MECH5 6 Frank Pune 28 CSE6 7 Grace Pune 22 EEE7 8 Helen Chennai 35 MECH8 9 Ivy Chennai 41 EEE9 10 Jack Hyderabad 18 CSE=== GENERALIZED DATA (AOI Result) === StudentID Name Department State Country AgeGroup0 1 Alice CSE Telangana India Young1 2 Bob CSE Telangana India Young2 3 Charlie ECE Delhi India Teen3 4 David ECE Delhi India Adult4 5 Eve MECH Maharashtra India Senior5 6 Frank CSE Maharashtra India Young6 7 Grace EEE Maharashtra India Young7 8 Helen MECH Tamil Nadu India Adult8 9 Ivy EEE Tamil Nadu India Senior9 10 Jack CSE Telangana India Teen✅ Generalized data saved to 'aoi_generalized.csv'