Pandas Vendor1 Export
This Python script automates the processing of employee demographic data for a given date. It loads employee data from a CSV file, filters it to include only active employees in specific departments, enriches the data by adding additional columns such as patrontype
, status
, and accesslevel
, and assigns short names based on department descriptions. The final processed data is formatted and saved into a new CSV file. Additionally, the script appends the new data to an existing staff file, ensuring that duplicate entries are removed. The script includes error handling to manage any exceptions during execution.
import pandas as pd
from datetime import datetime, timedelta
def load_data(file_path, encoding='latin1'):
"""Helper function to load a CSV file into a DataFrame."""
return pd.read_csv(file_path, encoding=encoding, keep_default_na=False)
def preprocess_employee_data(today_str):
"""Preprocess the employee demographics data by filtering active employees."""
staff_demographics_path = f"//PATH_TO_FILE/{today_str}-FILE.csv"
# Load the employee demographics data
employee_df = load_data(staff_demographics_path)
# Convert date columns and filter active employees
employee_df['TERMINATION_DATE'] = pd.to_datetime(employee_df['TERMINATION_DATE'], errors='coerce')
employee_df['HIRE_DATE'] = pd.to_datetime(employee_df['HIRE_DATE'], errors='coerce')
today = pd.to_datetime(today_str)
active_employees = employee_df[employee_df['HIRE_DATE'] == today]
return active_employees
def filter_and_enrich_data(active_employees):
"""Filter employees by department and add additional columns."""
# Define the departments to filter
filter_departments = [
"Group1", "Group2", "Group3", "Group4"
]
# Filter employees by departments
active_employees = active_employees[active_employees['DepartmentDescription'].isin(filter_departments)]
# Add additional columns
active_employees['patrontype'] = 'Faculty'
active_employees['status'] = 'A'
active_employees['accesslevel'] = 'Faculty'
active_employees['shortName'] = ' '
active_employees['primarySite'] = ' '
active_employees['PortalUserName'] = ' '
active_employees['password'] = ' '
active_employees['homeroom'] = ' '
return active_employees
def assign_short_names(active_employees):
"""Assign short names based on department descriptions."""
# Define a dictionary for mapping short names
# Needed to Redact this info, supplemented with place holder data
short_name_mapping = {
"school1": "scho1", "school2": "scho2", "school3": "scho3"
}
# Assign short names using mapping
active_employees['shortName'] = active_employees['DepartmentDescription'].map(short_name_mapping).fillna(active_employees['shortName'])
return active_employees
def create_final_dataframe(active_employees):
"""Create the final DataFrame with the necessary columns and format data."""
# Select and rename relevant columns
final_df = active_employees[[
"DEMOGRAPHIC_ID", "Last_Name", "First_Name", "GENDER", "patrontype",
"BUSINESS_EMAIL", "shortName", "primarySite", "PortalUserName",
"status", "password", "accesslevel", "homeroom"
]].rename(columns={
"DEMOGRAPHIC_ID": "DistrictID",
"Last_Name": "LastName",
"First_Name": "FirstName",
"GENDER": "Gender",
"BUSINESS_EMAIL": "EmployeeEmail",
"PortalUserName": "username"
})
# Format name columns and usernames
final_df['FirstName'] = final_df['FirstName'].str.upper()
final_df['LastName'] = final_df['LastName'].str.upper()
final_df['username'] = final_df['EmployeeEmail'].str.split('@').str[0]
return final_df
def save_to_csv(final_df, file_path):
"""Save the DataFrame to a CSV file."""
final_df.to_csv(file_path, index=False)
def append_to_existing_staff(final_df, staff_file_path):
"""Append the new staff data to the existing staff CSV file."""
staff_df = load_data(staff_file_path)
# Append and drop duplicates
updated_staff = pd.concat([staff_df, final_df]).drop_duplicates().astype(str)
# Save updated staff data
save_to_csv(updated_staff, staff_file_path)
def main():
"""Main function to orchestrate the staff data processing."""
try:
# Get today's date in Y/M/D format
today_str = datetime.today().strftime(r"%Y%m%d")
# Process employee data and enrich it
active_employees = preprocess_employee_data(today_str)
active_employees = filter_and_enrich_data(active_employees)
active_employees = assign_short_names(active_employees)
# Create the final DataFrame and save it
final_df = create_final_dataframe(active_employees)
save_to_csv(final_df, 'New_Users.csv')
# Append the data to the existing staff file
append_to_existing_staff(final_df, '//PATH_TO_EXISTING_FILE')
print("Staff data processed successfully.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
Last updated