# Get MIT Living Wage by Metropolitan Statistical Area (MSA)

This script retrieves the living wage by family type for cities in the United States. The living wage is based on the methodology established for the MIT Living Wage calculator, which is available for free at: https://livingwage.mit.edu/

### Step 1: Import required libraries.

In [2]:
import requests
from bs4 import BeautifulSoup
import csv

### Step 2: Define FIPS codes and state names.

In [4]:
state_fips = {
 "01": "Alabama", "02": "Alaska", "04": "Arizona", "05": "Arkansas", "06": "California", 
 "08": "Colorado", "09": "Connecticut", "10": "Delaware", "12": "Florida", "13": "Georgia", 
 "15": "Hawaii", "16": "Idaho", "17": "Illinois", "18": "Indiana", "19": "Iowa", "20": "Kansas", 
 "21": "Kentucky", "22": "Louisiana", "23": "Maine", "24": "Maryland", "25": "Massachusetts", 
 "26": "Michigan", "27": "Minnesota", "28": "Mississippi", "29": "Missouri", "30": "Montana", 
 "31": "Nebraska", "32": "Nevada", "33": "New Hampshire", "34": "New Jersey", "35": "New Mexico", 
 "36": "New York", "37": "North Carolina", "38": "North Dakota", "39": "Ohio", "40": "Oklahoma", 
 "41": "Oregon", "42": "Pennsylvania", "44": "Rhode Island", "45": "South Carolina", "46": "South Dakota", 
 "47": "Tennessee", "48": "Texas", "49": "Utah", "50": "Vermont", "51": "Virginia", "53": "Washington", 
 "54": "West Virginia", "55": "Wisconsin", "56": "Wyoming"
}

### Step 3: Define base URL for retrieving the data.

In [6]:
base_url = "https://livingwage.mit.edu/"

### Step 4: Set headers to simulate a browser request.

In [8]:
headers = {
 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

### Step 5: Define function to parse currency values and remove dollar signs.

In [10]:
def parse_currency(value):
 try:
 return float(value.replace('$', '').replace(',', ''))
 except ValueError:
 return None

### Step 6: Define function to retrive living wage data.

In [12]:
def get_all_annual_wages(url):
 try:
 response = requests.get(url, headers=headers)
 response.raise_for_status()
 soup = BeautifulSoup(response.text, 'html.parser')
 tables = soup.find_all('table')

 if not tables:
 print(f"No tables found at {url}")
 return {}

 # We know the data is in the third row (index 2)
 rows = tables[0].find_all('tr')
 if len(rows) < 3:
 print(f"Expected row not found in table at {url}")
 return {}

 wage_row = rows[2] # Third row
 cols = wage_row.find_all(['td', 'th'])

 # Expected 12 columns of wage data starting from index 1
 if len(cols) < 13:
 print(f"Not enough columns in wage row at {url}")
 return {}

 # Define keys in the exact order they appear in the table
 keys = [
 "1adult_0children", "1adult_1child", "1adult_2children", "1adult_3children",
 "2adults1working_0children", "2adults1working_1child", "2adults1working_2children", "2adults1working_3children",
 "2adults2working_0children", "2adults2working_1child", "2adults2working_2children", "2adults2working_3children"
 ]

 wages = {}
 for i in range(12):
 value = parse_currency(cols[i + 1].get_text(strip=True)) # +1 to skip label column
 wages[keys[i]] = round(value * 2080, 2) if value is not None else "N/A"

 return wages

 except Exception as e:
 print(f"Error processing URL {url}: {e}")
 return {}

### Step 7: Define expected wage keys.

In [14]:
expected_wage_keys = []
family_types = ['1adult', '2adults1working', '2adults2working']
children = ['0children', '1child', '2children', '3children']
for f in family_types:
 for c in children:
 expected_wage_keys.append(f"{f}_{c}")

### Step 8: Initialize data storage.

In [16]:
metro_data = []

### Step 9: Loop through states to retrieve living wage data.

In [None]:
for state_code, state_name in state_fips.items():
 print(f"Processing state: {state_name} ({state_code})...")

 state_url = f"{base_url}states/{state_code}/locations"
 state_response = requests.get(state_url, headers=headers)

 if state_response.status_code == 200:
 state_soup = BeautifulSoup(state_response.content, 'html.parser')
 msa_section = state_soup.find('div', class_="container")

 if msa_section:
 msa_links = msa_section.find_all('a', href=True)

 for msa_link in msa_links:
 if 'metros' in msa_link['href']:
 msa_url = msa_link['href']
 full_msa_url = f"{base_url}{msa_url}"
 msa_name = msa_link.text.strip()
 msa_name_label = msa_name[:-4].strip() if len(msa_name) > 4 else msa_name

 wage_data = get_all_annual_wages(full_msa_url)

 row = [
 state_code, state_name, msa_name, msa_name_label, full_msa_url
 ]
 for key in expected_wage_keys:
 row.append(wage_data.get(key, "N/A"))

 metro_data.append(row)
 else:
 print(f"No MSA section found for {state_name}")
 else:
 print(f"Failed to retrieve state page: {state_url}")

 print(f"Finished processing state: {state_name}")


Processing state: Alabama (01)...
Finished processing state: Alabama
Processing state: Alaska (02)...
Finished processing state: Alaska
Processing state: Arizona (04)...
Finished processing state: Arizona
Processing state: Arkansas (05)...
Finished processing state: Arkansas
Processing state: California (06)...
Finished processing state: California
Processing state: Colorado (08)...
Finished processing state: Colorado
Processing state: Connecticut (09)...
Finished processing state: Connecticut
Processing state: Delaware (10)...
Finished processing state: Delaware
Processing state: Florida (12)...
Finished processing state: Florida
Processing state: Georgia (13)...
Finished processing state: Georgia
Processing state: Hawaii (15)...
Finished processing state: Hawaii
Processing state: Idaho (16)...
Finished processing state: Idaho
Processing state: Illinois (17)...
Finished processing state: Illinois
Processing state: Indiana (18)...
Finished processing state: Indiana
Processing state: Io

### Step 10: Write data to a CSV file.

In [23]:
header = [
 "State Code", "State Name", "MSA Name", "MSA Name Label", "MSA URL"
] + expected_wage_keys

with open('mit_living_wage_by_msa.csv', 'w', newline='', encoding='utf-8') as file:
 writer = csv.writer(file)
 writer.writerow(header)

 for row in metro_data:
 writer.writerow(row)

print("CSV file 'mit_living_wage_by_msa.csv' has been created.")

CSV file 'mit_living_wage_by_msa.csv' has been created.
