Pandas Vendor1 Export
import pandas as pd
from datetime import datetime, timedelta
def load_data(file_path, encoding='latin1'):
"""Helper function to load a CSV file into a DataFrame."""
return pd.read_csv(file_path, encoding=encoding, keep_default_na=False)
def preprocess_employee_data(today_str):
"""Preprocess the employee demographics data by filtering active employees."""
staff_demographics_path = f"//PATH_TO_FILE/{today_str}-FILE.csv"
# Load the employee demographics data
employee_df = load_data(staff_demographics_path)
# Convert date columns and filter active employees
employee_df['TERMINATION_DATE'] = pd.to_datetime(employee_df['TERMINATION_DATE'], errors='coerce')
employee_df['HIRE_DATE'] = pd.to_datetime(employee_df['HIRE_DATE'], errors='coerce')
today = pd.to_datetime(today_str)
active_employees = employee_df[employee_df['HIRE_DATE'] == today]
return active_employees
def filter_and_enrich_data(active_employees):
"""Filter employees by department and add additional columns."""
# Define the departments to filter
filter_departments = [
"Group1", "Group2", "Group3", "Group4"
]
# Filter employees by departments
active_employees = active_employees[active_employees['DepartmentDescription'].isin(filter_departments)]
# Add additional columns
active_employees['patrontype'] = 'Faculty'
active_employees['status'] = 'A'
active_employees['accesslevel'] = 'Faculty'
active_employees['shortName'] = ' '
active_employees['primarySite'] = ' '
active_employees['PortalUserName'] = ' '
active_employees['password'] = ' '
active_employees['homeroom'] = ' '
return active_employees
def assign_short_names(active_employees):
"""Assign short names based on department descriptions."""
# Define a dictionary for mapping short names
# Needed to Redact this info, supplemented with place holder data
short_name_mapping = {
"school1": "scho1", "school2": "scho2", "school3": "scho3"
}
# Assign short names using mapping
active_employees['shortName'] = active_employees['DepartmentDescription'].map(short_name_mapping).fillna(active_employees['shortName'])
return active_employees
def create_final_dataframe(active_employees):
"""Create the final DataFrame with the necessary columns and format data."""
# Select and rename relevant columns
final_df = active_employees[[
"DEMOGRAPHIC_ID", "Last_Name", "First_Name", "GENDER", "patrontype",
"BUSINESS_EMAIL", "shortName", "primarySite", "PortalUserName",
"status", "password", "accesslevel", "homeroom"
]].rename(columns={
"DEMOGRAPHIC_ID": "DistrictID",
"Last_Name": "LastName",
"First_Name": "FirstName",
"GENDER": "Gender",
"BUSINESS_EMAIL": "EmployeeEmail",
"PortalUserName": "username"
})
# Format name columns and usernames
final_df['FirstName'] = final_df['FirstName'].str.upper()
final_df['LastName'] = final_df['LastName'].str.upper()
final_df['username'] = final_df['EmployeeEmail'].str.split('@').str[0]
return final_df
def save_to_csv(final_df, file_path):
"""Save the DataFrame to a CSV file."""
final_df.to_csv(file_path, index=False)
def append_to_existing_staff(final_df, staff_file_path):
"""Append the new staff data to the existing staff CSV file."""
staff_df = load_data(staff_file_path)
# Append and drop duplicates
updated_staff = pd.concat([staff_df, final_df]).drop_duplicates().astype(str)
# Save updated staff data
save_to_csv(updated_staff, staff_file_path)
def main():
"""Main function to orchestrate the staff data processing."""
try:
# Get today's date in Y/M/D format
today_str = datetime.today().strftime(r"%Y%m%d")
# Process employee data and enrich it
active_employees = preprocess_employee_data(today_str)
active_employees = filter_and_enrich_data(active_employees)
active_employees = assign_short_names(active_employees)
# Create the final DataFrame and save it
final_df = create_final_dataframe(active_employees)
save_to_csv(final_df, 'New_Users.csv')
# Append the data to the existing staff file
append_to_existing_staff(final_df, '//PATH_TO_EXISTING_FILE')
print("Staff data processed successfully.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()
Last updated