Pandas Vendor2 Export

This Python script automates the process of generating and transferring CSV files containing parent and staff data for the Scottsdale Unified School District (SUSD) to a vendor using SFTP. It extracts, transforms, and merges data from various CSV files to produce formatted lists of parents and staff, filters out invalid records, and exports them as CSV files. These files are then uploaded to the vendor's SFTP server. Upon completion, the files are archived, and if any errors occur during the process, an alert is sent using a PowerShell script.

import pandas as pd
import pysftp
from datetime import date
import os
import shutil
import subprocess
import sys


def get_file_date(time_format=r"%m%d%Y"):
    """Returns today's date formatted."""
    return date.today().strftime(time_format)


def load_csv(file_path, encoding='latin1'):
    """Load a CSV file into a DataFrame."""
    return pd.read_csv(file_path, encoding=encoding, keep_default_na=False)


def gen_parents():
    """Generates a CSV file with parent demographic data for PeachJar."""
    # Define file paths
    base_path = f"//PATH_TO_FILE_DIR/{get_file_date()}/"
    parent_demographics = load_csv(f"{base_path}iam.sis.demographics.csv")
    parent_reference = load_csv(f"{base_path}iam.sis.parentreference.csv")
    schools = load_csv(f"{base_path}iam.sis.schools.csv")

    # Merge dataframes to get the final data
    stage1 = pd.merge(parent_demographics, parent_reference, left_on='DEMOGRAPHICS_ID', right_on='PARENT_ID')
    stage2 = pd.merge(stage1, parent_demographics, left_on='STUDENT_ID', right_on='DEMOGRAPHICS_ID')
    final = pd.merge(stage2, schools, left_on='LOCATION_y', right_on='SCHOOL_ID')

    # Select relevant columns, drop rows without emails, and remove duplicates
    final = final[['NAME', 'GRADE_LEVEL_y', 'PERSONAL_EMAIL_x']].rename(
        columns={"NAME": "school_sis_id", "GRADE_LEVEL_y": "grade", "PERSONAL_EMAIL_x": "email"}
    )
    final = final[final['email'].str.strip() != ''].drop_duplicates()

    # Export final CSV
    final.to_csv('ScottsdaleUSDParents.csv', index=False)


def gen_staff():
    """Generates a CSV file with staff data for PeachJar."""
    base_path = f"//PATH_TO_FILE_DIR/{get_file_date()}/"
    staff_demographics = load_csv(f"{base_path}iam.hrms.demographics.csv")

    # Convert date columns and filter rows based on hire and termination dates
    today = pd.to_datetime(get_file_date(r"%Y%m%d"))
    staff_demographics["TERMINATION_DATE"] = pd.to_datetime(staff_demographics["TERMINATION_DATE"], errors='coerce')
    staff_demographics["HIRE_DATE"] = pd.to_datetime(staff_demographics["HIRE_DATE"], errors='coerce')

    active_staff = staff_demographics[
        (staff_demographics['HIRE_DATE'] <= today) & 
        (staff_demographics['TERMINATION_DATE'].isnull() | (staff_demographics['TERMINATION_DATE'] > today))
    ]

    # Add PeachJar-specific ID field and clean the sis_id column
    active_staff['sis_id'] = active_staff["DepartmentDescription"]

    # Prepare the final data with extra columns for names
    final = active_staff[['sis_id', 'BUSINESS_EMAIL']].rename(columns={"BUSINESS_EMAIL": "email"})
    final.insert(2, "first_name", " ")
    final.insert(3, "last_name", " ")

    
    school_mapping = {
        "SCHOOL1": "SCHO1", "SCHOOL2": "SCHO2"
    }
    final['sis_id'] = final['sis_id'].replace(school_mapping)

    # Remove rows with missing emails and export to CSV
    final = final[final['email'].str.strip() != '']
    final.to_csv('ScottsdaleUSDStaff.csv', index=False)


def ftp_files():
    """Transfer files via SFTP to the VENDOR server."""
    host = 'SFTP_HOST'
    port = 22

    with open('Config.txt') as config:
        username = config.readline().strip()
        password = config.readline().strip()

    # Define paths for local and remote files
    local_files = {
        'staff': './ScottsdaleUSDStaff.csv',
        'parents': './ScottsdaleUSDParents.csv'
    }
    remote_paths = {
        'staff': '/REMOTE_PATH/ScottsdaleUSDStaff.csv',
        'parents': '/REMOTE_PATH/ScottsdaleUSDParents.csv'
    }

    # Establish SFTP connection and transfer files
    cnopts = pysftp.CnOpts()
    cnopts.hostkeys = None
    with pysftp.Connection(host=host, port=port, username=username, password=password, cnopts=cnopts) as sftp:
        print("SFTP connection established.")
        for key, local_file in local_files.items():
            sftp.put(local_file, remote_paths[key])
            print(f"Uploaded {local_file} to {remote_paths[key]}")


def clean_up():
    """Move generated files to the archive folder."""
    archive_dir = f"//ARCHIVE_PATH/{get_file_date()}"

    os.makedirs(archive_dir, exist_ok=True)
    shutil.move('./ScottsdaleUSDStaff.csv', archive_dir)
    shutil.move('./ScottsdaleUSDParents.csv', archive_dir)
    print(f"Files archived to {archive_dir}")


def send_alert():
    """Send an alert if SFTP fails."""
    subprocess.Popen(['powershell.exe', './send_alert.ps1'], stdout=sys.stdout)


# Main Execution
if __name__ == "__main__":
    try:
        gen_staff()
        gen_parents()

        ftp_files()
        clean_up()
    except Exception as e:
        print(f"An error occurred: {e}")
        send_alert()

    sys.exit()

PreviousResponse Headers NextPandas Vendor1 Export

Last updated 10 months ago