Pandas Vendor2 Export
This Python script automates the process of generating and transferring CSV files containing parent and staff data for the Scottsdale Unified School District (SUSD) to a vendor using SFTP. It extracts, transforms, and merges data from various CSV files to produce formatted lists of parents and staff, filters out invalid records, and exports them as CSV files. These files are then uploaded to the vendor's SFTP server. Upon completion, the files are archived, and if any errors occur during the process, an alert is sent using a PowerShell script.
import pandas as pd
import pysftp
from datetime import date
import os
import shutil
import subprocess
import sys
def get_file_date(time_format=r"%m%d%Y"):
"""Returns today's date formatted."""
return date.today().strftime(time_format)
def load_csv(file_path, encoding='latin1'):
"""Load a CSV file into a DataFrame."""
return pd.read_csv(file_path, encoding=encoding, keep_default_na=False)
def gen_parents():
"""Generates a CSV file with parent demographic data for PeachJar."""
# Define file paths
base_path = f"//PATH_TO_FILE_DIR/{get_file_date()}/"
parent_demographics = load_csv(f"{base_path}iam.sis.demographics.csv")
parent_reference = load_csv(f"{base_path}iam.sis.parentreference.csv")
schools = load_csv(f"{base_path}iam.sis.schools.csv")
# Merge dataframes to get the final data
stage1 = pd.merge(parent_demographics, parent_reference, left_on='DEMOGRAPHICS_ID', right_on='PARENT_ID')
stage2 = pd.merge(stage1, parent_demographics, left_on='STUDENT_ID', right_on='DEMOGRAPHICS_ID')
final = pd.merge(stage2, schools, left_on='LOCATION_y', right_on='SCHOOL_ID')
# Select relevant columns, drop rows without emails, and remove duplicates
final = final[['NAME', 'GRADE_LEVEL_y', 'PERSONAL_EMAIL_x']].rename(
columns={"NAME": "school_sis_id", "GRADE_LEVEL_y": "grade", "PERSONAL_EMAIL_x": "email"}
)
final = final[final['email'].str.strip() != ''].drop_duplicates()
# Export final CSV
final.to_csv('ScottsdaleUSDParents.csv', index=False)
def gen_staff():
"""Generates a CSV file with staff data for PeachJar."""
base_path = f"//PATH_TO_FILE_DIR/{get_file_date()}/"
staff_demographics = load_csv(f"{base_path}iam.hrms.demographics.csv")
# Convert date columns and filter rows based on hire and termination dates
today = pd.to_datetime(get_file_date(r"%Y%m%d"))
staff_demographics["TERMINATION_DATE"] = pd.to_datetime(staff_demographics["TERMINATION_DATE"], errors='coerce')
staff_demographics["HIRE_DATE"] = pd.to_datetime(staff_demographics["HIRE_DATE"], errors='coerce')
active_staff = staff_demographics[
(staff_demographics['HIRE_DATE'] <= today) &
(staff_demographics['TERMINATION_DATE'].isnull() | (staff_demographics['TERMINATION_DATE'] > today))
]
# Add PeachJar-specific ID field and clean the sis_id column
active_staff['sis_id'] = active_staff["DepartmentDescription"]
# Prepare the final data with extra columns for names
final = active_staff[['sis_id', 'BUSINESS_EMAIL']].rename(columns={"BUSINESS_EMAIL": "email"})
final.insert(2, "first_name", " ")
final.insert(3, "last_name", " ")
school_mapping = {
"SCHOOL1": "SCHO1", "SCHOOL2": "SCHO2"
}
final['sis_id'] = final['sis_id'].replace(school_mapping)
# Remove rows with missing emails and export to CSV
final = final[final['email'].str.strip() != '']
final.to_csv('ScottsdaleUSDStaff.csv', index=False)
def ftp_files():
"""Transfer files via SFTP to the VENDOR server."""
host = 'SFTP_HOST'
port = 22
with open('Config.txt') as config:
username = config.readline().strip()
password = config.readline().strip()
# Define paths for local and remote files
local_files = {
'staff': './ScottsdaleUSDStaff.csv',
'parents': './ScottsdaleUSDParents.csv'
}
remote_paths = {
'staff': '/REMOTE_PATH/ScottsdaleUSDStaff.csv',
'parents': '/REMOTE_PATH/ScottsdaleUSDParents.csv'
}
# Establish SFTP connection and transfer files
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
with pysftp.Connection(host=host, port=port, username=username, password=password, cnopts=cnopts) as sftp:
print("SFTP connection established.")
for key, local_file in local_files.items():
sftp.put(local_file, remote_paths[key])
print(f"Uploaded {local_file} to {remote_paths[key]}")
def clean_up():
"""Move generated files to the archive folder."""
archive_dir = f"//ARCHIVE_PATH/{get_file_date()}"
os.makedirs(archive_dir, exist_ok=True)
shutil.move('./ScottsdaleUSDStaff.csv', archive_dir)
shutil.move('./ScottsdaleUSDParents.csv', archive_dir)
print(f"Files archived to {archive_dir}")
def send_alert():
"""Send an alert if SFTP fails."""
subprocess.Popen(['powershell.exe', './send_alert.ps1'], stdout=sys.stdout)
# Main Execution
if __name__ == "__main__":
try:
gen_staff()
gen_parents()
ftp_files()
clean_up()
except Exception as e:
print(f"An error occurred: {e}")
send_alert()
sys.exit()
Last updated