Pandas Vendor1 Export

This Python script automates the processing of employee demographic data for a given date. It loads employee data from a CSV file, filters it to include only active employees in specific departments, enriches the data by adding additional columns such as patrontype, status, and accesslevel, and assigns short names based on department descriptions. The final processed data is formatted and saved into a new CSV file. Additionally, the script appends the new data to an existing staff file, ensuring that duplicate entries are removed. The script includes error handling to manage any exceptions during execution.

import pandas as pd
from datetime import datetime, timedelta

def load_data(file_path, encoding='latin1'):
    """Helper function to load a CSV file into a DataFrame."""
    return pd.read_csv(file_path, encoding=encoding, keep_default_na=False)

def preprocess_employee_data(today_str):
    """Preprocess the employee demographics data by filtering active employees."""
    staff_demographics_path = f"//PATH_TO_FILE/{today_str}-FILE.csv"
    
    # Load the employee demographics data
    employee_df = load_data(staff_demographics_path)
    
    # Convert date columns and filter active employees
    employee_df['TERMINATION_DATE'] = pd.to_datetime(employee_df['TERMINATION_DATE'], errors='coerce')
    employee_df['HIRE_DATE'] = pd.to_datetime(employee_df['HIRE_DATE'], errors='coerce')
    
    today = pd.to_datetime(today_str)
    active_employees = employee_df[employee_df['HIRE_DATE'] == today]
    
    return active_employees

def filter_and_enrich_data(active_employees):
    """Filter employees by department and add additional columns."""
    # Define the departments to filter
    filter_departments = [
        "Group1", "Group2", "Group3", "Group4"
    ]
    
    # Filter employees by departments
    active_employees = active_employees[active_employees['DepartmentDescription'].isin(filter_departments)]
    
    # Add additional columns
    active_employees['patrontype'] = 'Faculty'
    active_employees['status'] = 'A'
    active_employees['accesslevel'] = 'Faculty'
    active_employees['shortName'] = ' '
    active_employees['primarySite'] = ' '
    active_employees['PortalUserName'] = ' '
    active_employees['password'] = ' '
    active_employees['homeroom'] = ' '
    
    return active_employees

def assign_short_names(active_employees):
    """Assign short names based on department descriptions."""
    # Define a dictionary for mapping short names
    # Needed to Redact this info, supplemented with place holder data
    short_name_mapping = {
        "school1": "scho1", "school2": "scho2", "school3": "scho3"
    }
    
    # Assign short names using mapping
    active_employees['shortName'] = active_employees['DepartmentDescription'].map(short_name_mapping).fillna(active_employees['shortName'])
    
    return active_employees

def create_final_dataframe(active_employees):
    """Create the final DataFrame with the necessary columns and format data."""
    # Select and rename relevant columns
    final_df = active_employees[[
        "DEMOGRAPHIC_ID", "Last_Name", "First_Name", "GENDER", "patrontype", 
        "BUSINESS_EMAIL", "shortName", "primarySite", "PortalUserName", 
        "status", "password", "accesslevel", "homeroom"
    ]].rename(columns={
        "DEMOGRAPHIC_ID": "DistrictID", 
        "Last_Name": "LastName", 
        "First_Name": "FirstName", 
        "GENDER": "Gender", 
        "BUSINESS_EMAIL": "EmployeeEmail", 
        "PortalUserName": "username"
    })
    
    # Format name columns and usernames
    final_df['FirstName'] = final_df['FirstName'].str.upper()
    final_df['LastName'] = final_df['LastName'].str.upper()
    final_df['username'] = final_df['EmployeeEmail'].str.split('@').str[0]
    
    return final_df

def save_to_csv(final_df, file_path):
    """Save the DataFrame to a CSV file."""
    final_df.to_csv(file_path, index=False)

def append_to_existing_staff(final_df, staff_file_path):
    """Append the new staff data to the existing staff CSV file."""
    staff_df = load_data(staff_file_path)
    
    # Append and drop duplicates
    updated_staff = pd.concat([staff_df, final_df]).drop_duplicates().astype(str)
    
    # Save updated staff data
    save_to_csv(updated_staff, staff_file_path)

def main():
    """Main function to orchestrate the staff data processing."""
    try:
        # Get today's date in Y/M/D format
        today_str = datetime.today().strftime(r"%Y%m%d")
        
        # Process employee data and enrich it
        active_employees = preprocess_employee_data(today_str)
        active_employees = filter_and_enrich_data(active_employees)
        active_employees = assign_short_names(active_employees)
        
        # Create the final DataFrame and save it
        final_df = create_final_dataframe(active_employees)
        save_to_csv(final_df, 'New_Users.csv')
        
        # Append the data to the existing staff file
        append_to_existing_staff(final_df, '//PATH_TO_EXISTING_FILE')
        
        print("Staff data processed successfully.")
        
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

PreviousPandas Vendor2 Export NextPandas and AD

Last updated 10 months ago