{ "cells": [ { "cell_type": "markdown", "id": "91cfd67f-5c25-496d-a0b7-ae6bcbc89c9b", "metadata": {}, "source": [ "# PSEO API - Earnings\n", "\n", "This script is an example of how to retrieve earnings data from the PSEO Explorer using an API in Python. Before you begin, you will need to [request an API key](https://api.census.gov/data/key_signup.html) from the U.S. Census Bureau. Once you have a key, replace it in the script below for use in the list of parameters in the request." ] }, { "cell_type": "markdown", "id": "7e26e917-fb6d-4778-b720-b765ff384dc1", "metadata": {}, "source": [ "### Define the U.S. Census API key for use in this session." ] }, { "cell_type": "code", "execution_count": 10, "id": "5ab52620-89a6-4b9b-85e4-65668ebd59ad", "metadata": {}, "outputs": [], "source": [ "key = \"INSERT YOUR API KEY HERE\" # Replace the text in quotation marks with your API key." ] }, { "cell_type": "markdown", "id": "9ab615bb-3d46-41a8-92e4-cd596e092320", "metadata": {}, "source": [ "### Define the base URL for use in this session." ] }, { "cell_type": "code", "execution_count": null, "id": "662aaeea-2edb-4d5f-bce2-8b9bcb35e35a", "metadata": {}, "outputs": [], "source": [ "url = \"https://api.census.gov/data/timeseries/pseo/earnings\"" ] }, { "cell_type": "markdown", "id": "b86cd7d9-a64e-476f-9198-f32b9348e6cc", "metadata": {}, "source": [ "### Import required libraries." ] }, { "cell_type": "code", "execution_count": 12, "id": "e54b635c-dbda-42e7-9e1b-6907600c2fcd", "metadata": {}, "outputs": [], "source": [ "import requests\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "bdb26af7-65d9-40c1-9443-749f457ce5dd", "metadata": {}, "source": [ "## Demo 1: Get oriented to the data." ] }, { "cell_type": "markdown", "id": "8f9916d9-10d3-44a4-84f9-1e7fb20901fd", "metadata": {}, "source": [ "### 1. Get a list of all institutions." ] }, { "cell_type": "code", "execution_count": 31, "id": "5f606ec2-5570-444c-9882-51cbfbdc0ac6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION us\n", "0 01 Institutions in Alabama 1\n", "1 04 Institutions in Arizona 1\n", "2 08 Institutions in Colorado 1\n", "3 09 Institutions in Connecticut 1\n", "4 11 Institutions in District of Columbia 1\n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of variables into a single string.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df.to_csv(\"pseoe_institutions.csv\", index=False)\n", " print(df.head())\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "5cd10ea6-0f2c-4f3c-939f-537583380ee7", "metadata": {}, "source": [ "### 2. Get a list of institutions in Colorado." ] }, { "cell_type": "code", "execution_count": 102, "id": "c64fd623-0718-4c1c-ae22-649a720348e0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION INST_STATE us\n", "0 08 Institutions in Colorado 08 1\n", "1 00134500 Adams State University 08 1\n", "2 00134600 Arapahoe Community College 08 1\n", "3 00134800 Colorado School of Mines 08 1\n", "4 00134900 University of Northern Colorado 08 1\n", "5 00135000 Colorado State University 08 1\n", "6 00135300 Fort Lewis College 08 1\n", "7 00135800 Colorado Mesa University 08 1\n", "8 00136000 Metropolitan State University of Denver 08 1\n", "9 00136500 Colorado State University-Pueblo 08 1\n", "10 00136800 Trinidad State Junior College 08 1\n", "11 00137000 University of Colorado Boulder 08 1\n", "12 00137200 Western Colorado University 08 1\n", "13 00450600 Colorado Mountain College 08 1\n", "14 00450800 University of Colorado Denver 08 1\n", "15 00450900 University of Colorado Colorado Springs 08 1\n", "16 00793300 Front Range Community College-Westminster Campus 08 1\n", "17 00889600 Pikes Peak Community College 08 1\n", "18 00954200 Community College of Denver 08 1\n", "19 00954300 Red Rocks Community College 08 1\n", "20 00998100 Morgan Community College 08 1\n", "21 02116300 Pueblo Community College 08 1\n", "22 04208700 Colorado State University - Global Campus 08 1\n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INST_STATE\":\"08\", # Filter records to Colorado only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the data.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df.to_csv(\"pseoe_institutions_co.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "f78bbb45-ac42-4c93-9402-88277a39b74c", "metadata": {}, "source": [ "### 3. Get directory data for Arapahoe Community College." ] }, { "cell_type": "code", "execution_count": 98, "id": "0bb44da1-a7f2-4a93-bf1c-99d54221fa6e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION INST_STATE LABEL_INST_STATE \\\n", "0 00134600 Arapahoe Community College 08 Colorado \n", "\n", " INSTITUTION us \n", "0 00134600 1 \n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"INST_STATE\",\n", " \"LABEL_INST_STATE\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df.to_csv(\"pseoe_institutions_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "581d4df4-de17-42eb-8e9e-95d29e19eecc", "metadata": {}, "source": [ "## Demo 2: Get the median earnings at one year for Arapahoe Community College." ] }, { "cell_type": "markdown", "id": "4395460d-3c57-47fe-a4eb-8a53b9451ce1", "metadata": {}, "source": [ "### 1. Add percentile earnings to the list of fields." ] }, { "cell_type": "code", "execution_count": 116, "id": "a8648303-09f6-45ae-a3bc-f5e7e1a69982", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION Y1_P50_EARNINGS INSTITUTION us\n", "0 00134600 Arapahoe Community College None 00134600 1\n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "2130807d-cc51-4cc9-af44-3d940f3d54ce", "metadata": {}, "source": [ "### 2. Add degree level to the list of fields." ] }, { "cell_type": "code", "execution_count": 119, "id": "6eafe9b0-8150-4339-8cf1-6a955a2d2a3a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL Y1_P50_EARNINGS \\\n", "0 00134600 Arapahoe Community College 01 36431 \n", "1 00134600 Arapahoe Community College 02 47466 \n", "2 00134600 Arapahoe Community College 03 44725 \n", "3 00134600 Arapahoe Community College 05 None \n", "\n", " INSTITUTION us \n", "0 00134600 1 \n", "1 00134600 1 \n", "2 00134600 1 \n", "3 00134600 1 \n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "8cd5932b-0397-46e2-b6d8-544d5a4cb2d3", "metadata": {}, "source": [ "### 4. Drop duplicate columns." ] }, { "cell_type": "code", "execution_count": 121, "id": "ec3667b2-67ec-4534-a901-745df56120d3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL Y1_P50_EARNINGS us\n", "0 00134600 Arapahoe Community College 01 36431 1\n", "1 00134600 Arapahoe Community College 02 47466 1\n", "2 00134600 Arapahoe Community College 03 44725 1\n", "3 00134600 Arapahoe Community College 05 None 1\n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " \n", " df = df.loc[:, ~df.columns.duplicated()] # Drop duplicate columns.\n", " \n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "18fbf546-f037-4e5c-9085-910175b5690c", "metadata": {}, "source": [ "### 3. Filter the data for Associate's degrees only." ] }, { "cell_type": "code", "execution_count": 125, "id": "6d36352d-6feb-4dbc-a062-0143931bb5ca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL Y1_P50_EARNINGS us\n", "0 00134600 Arapahoe Community College 03 44725 1\n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"DEGREE_LEVEL\":\"03\", # Filter records to Associate's degrees only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df = df.loc[:, ~df.columns.duplicated()] # Drop duplicate columns.\n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "ee37e560-e445-4d3e-8392-2ab9608e5bbe", "metadata": {}, "source": [ "### 4. Add the graduation cohort." ] }, { "cell_type": "code", "execution_count": 131, "id": "5e33292e-e970-401f-a035-e3a9caedc2e2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL GRAD_COHORT \\\n", "0 00134600 Arapahoe Community College 03 0000 \n", "1 00134600 Arapahoe Community College 03 2001 \n", "2 00134600 Arapahoe Community College 03 2006 \n", "3 00134600 Arapahoe Community College 03 2011 \n", "4 00134600 Arapahoe Community College 03 2016 \n", "\n", " Y1_P50_EARNINGS us \n", "0 44725 1 \n", "1 47108 1 \n", "2 44268 1 \n", "3 43898 1 \n", "4 44287 1 \n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"GRAD_COHORT\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"DEGREE_LEVEL\":\"03\", # Filter records to Associate's degrees only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df = df.loc[:, ~df.columns.duplicated()] # Drop duplicate columns.\n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "a48e120b-2afe-4f9c-a8a0-ba58623d2c99", "metadata": {}, "source": [ "### 5. Add the CIP code columns." ] }, { "cell_type": "code", "execution_count": 138, "id": "383db77f-4598-4d82-8a34-8f1c2451bbc7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL GRAD_COHORT \\\n", "0 00134600 Arapahoe Community College 03 0000 \n", "1 00134600 Arapahoe Community College 03 0000 \n", "2 00134600 Arapahoe Community College 03 0000 \n", "3 00134600 Arapahoe Community College 03 0000 \n", "4 00134600 Arapahoe Community College 03 0000 \n", ".. ... ... ... ... \n", "232 00134600 Arapahoe Community College 03 2016 \n", "233 00134600 Arapahoe Community College 03 2016 \n", "234 00134600 Arapahoe Community College 03 2016 \n", "235 00134600 Arapahoe Community College 03 2016 \n", "236 00134600 Arapahoe Community College 03 2016 \n", "\n", " CIP_LEVEL LABEL_CIP_LEVEL CIPCODE \\\n", "0 A All Degree Fields 00 \n", "1 2 2-Digit CIP Family 01 \n", "2 2 2-Digit CIP Family 09 \n", "3 2 2-Digit CIP Family 11 \n", "4 2 2-Digit CIP Family 12 \n", ".. ... ... ... \n", "232 4 4-Digit CIP Codes 51.10 \n", "233 4 4-Digit CIP Codes 51.38 \n", "234 4 4-Digit CIP Codes 52.02 \n", "235 4 4-Digit CIP Codes 52.03 \n", "236 4 4-Digit CIP Codes 52.18 \n", "\n", " LABEL_CIPCODE Y1_P50_EARNINGS us \n", "0 All Instructional Programs 44725 1 \n", "1 Agricultural/Animal/Plant/Veterinary Science a... None 1 \n", "2 Communication, Journalism, and Related Programs None 1 \n", "3 Computer and Information Sciences and Support ... 52026 1 \n", "4 Culinary, Entertainment, and Personal Services 45120 1 \n", ".. ... ... .. \n", "232 Clinical/Medical Laboratory Science/Research a... 50672 1 \n", "233 Registered Nursing, Nursing Administration, Nu... 62783 1 \n", "234 Business Administration, Management and Operat... 39953 1 \n", "235 Accounting and Related Services 47550 1 \n", "236 General Sales, Merchandising and Related Marke... None 1 \n", "\n", "[237 rows x 10 columns]\n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"GRAD_COHORT\",\n", " \"CIP_LEVEL\",\n", " \"LABEL_CIP_LEVEL\",\n", " \"CIPCODE\",\n", " \"LABEL_CIPCODE\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"DEGREE_LEVEL\":\"03\", # Filter records to Associate's degrees only.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df = df.loc[:, ~df.columns.duplicated()] # Drop duplicate columns.\n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "20f0a33b-7c89-43f7-83ea-b7bd00efb41f", "metadata": {}, "source": [ "### 6. Filter the data for all instructional programs for all graduation cohorts." ] }, { "cell_type": "code", "execution_count": 158, "id": "2a41ddc9-2c8a-450d-bb96-e3b1102da375", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL GRAD_COHORT CIP_LEVEL \\\n", "0 00134600 Arapahoe Community College 03 0000 A \n", "\n", " LABEL_CIP_LEVEL CIPCODE LABEL_CIPCODE Y1_P50_EARNINGS us \n", "0 All Degree Fields 00 All Instructional Programs 44725 1 \n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"GRAD_COHORT\",\n", " \"CIP_LEVEL\",\n", " \"LABEL_CIP_LEVEL\",\n", " \"CIPCODE\",\n", " \"LABEL_CIPCODE\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\"00134600\", # Filter records to Arapahoe Community College only.\n", " \"DEGREE_LEVEL\":\"03\", # Filter records to Associate's degrees only.\n", " \"CIPCODE\":\"00\", # Filter for all instructional programs.\n", " \"GRAD_COHORT\":\"0000\", # Filter for all graduation cohorts.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df = df.loc[:, ~df.columns.duplicated()] # Drop duplicate columns.\n", " df.to_csv(\"pseoe_acc.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "ac017454-c41e-4660-9492-7abe593359fc", "metadata": {}, "source": [ "## Demo 3: Add Adams State University to the call above." ] }, { "cell_type": "markdown", "id": "a8e0ee45-670c-4578-9f5f-460457016fad", "metadata": {}, "source": [ "### 1. Try creating a list of institutions and joining them into a single string." ] }, { "cell_type": "code", "execution_count": 205, "id": "8c53151e-7fa5-494d-bf35-ab23d504d7d7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error 204: \n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"GRAD_COHORT\",\n", " \"CIP_LEVEL\",\n", " \"LABEL_CIP_LEVEL\",\n", " \"CIPCODE\",\n", " \"LABEL_CIPCODE\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# Declare the institutions to retrieve data for.\n", "institutions = [\"00134600\", \"00134500\"]\n", "\n", "# List the parameters for the GET request.\n", "params = {\n", " \"get\": \",\".join(fields), # Concatenate the list of fields into a single string.\n", " \"INSTITUTION\":\",\".join(institutions), # Filter for selected institutions.\n", " \"DEGREE_LEVEL\":\"03\", # Filter records to Associate's degrees only.\n", " \"CIPCODE\":\"00\", # Filter for all instructional programs.\n", " \"GRAD_COHORT\":\"0000\", # Filter for all graduation cohorts.\n", " \"for\": \"us:1\", # Required geography for the API.\n", " \"key\": key\n", "}\n", "\n", "# Send the GET request.\n", "response = requests.get(url, params=params)\n", "\n", "# Check the response for errors. If none, write the data to a CSV file and display the first few records.\n", "if response.status_code == 200:\n", " data = response.json()\n", " # Convert to pandas DataFrame\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " df = df.loc[:, ~df.columns.duplicated()] # Drop duplicate columns.\n", " df.to_csv(\"pseoe_co.csv\", index=False)\n", " print(df)\n", "else:\n", " print(f\"Error {response.status_code}: {response.text}\")" ] }, { "cell_type": "markdown", "id": "3a04775b-00e8-43a8-99e0-67fc5d9beaf3", "metadata": {}, "source": [ "### 2. Try using a for loop instead." ] }, { "cell_type": "code", "execution_count": 207, "id": "b3900485-048d-4bcc-b889-8a05e6ccfb20", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " INSTITUTION LABEL_INSTITUTION DEGREE_LEVEL GRAD_COHORT CIP_LEVEL \\\n", "0 00134600 Arapahoe Community College 03 0000 A \n", "1 00134500 Adams State University 03 0000 A \n", "\n", " LABEL_CIP_LEVEL CIPCODE LABEL_CIPCODE Y1_P50_EARNINGS us \n", "0 All Degree Fields 00 All Instructional Programs 44725 1 \n", "1 All Degree Fields 00 All Instructional Programs 25452 1 \n" ] } ], "source": [ "# Create a list to store the fields to retrieve.\n", "fields = [\n", " \"INSTITUTION\",\n", " \"LABEL_INSTITUTION\",\n", " \"DEGREE_LEVEL\",\n", " \"GRAD_COHORT\",\n", " \"CIP_LEVEL\",\n", " \"LABEL_CIP_LEVEL\",\n", " \"CIPCODE\",\n", " \"LABEL_CIPCODE\",\n", " \"Y1_P50_EARNINGS\"\n", "]\n", "\n", "# Declare the institutions to retrieve data for.\n", "institutions = [\"00134600\", \"00134500\"]\n", "\n", "# Create a placeholder for the data.\n", "all_data = [] \n", "\n", "for inst in institutions:\n", " params = {\n", " \"get\": \",\".join(fields),\n", " \"INSTITUTION\": inst, # One institution at a time.\n", " \"DEGREE_LEVEL\": \"03\",\n", " \"CIPCODE\": \"00\",\n", " \"GRAD_COHORT\": \"0000\",\n", " \"for\": \"us:1\",\n", " \"key\": key\n", " }\n", " \n", " response = requests.get(url, params=params)\n", " \n", " if response.status_code == 200:\n", " data = response.json()\n", " df = pd.DataFrame(data[1:], columns=data[0])\n", " all_data.append(df)\n", " else:\n", " print(f\"Error {response.status_code}: {response.text}\")\n", "\n", "# Combine all results into one DataFrame\n", "final_df = pd.concat(all_data, ignore_index=True)\n", "final_df = final_df.loc[:, ~final_df.columns.duplicated()]\n", "final_df.to_csv(\"pseoe_co.csv\", index=False)\n", "print(final_df)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f58e647f-fb2c-4ff6-a588-032650ab71f3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:base] *", "language": "python", "name": "conda-base-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }