1 | import pandas as pd |
1 | #州名的简写 |
1 | pop.columns[0] |
'state/region'1 | pop = pop.rename(columns={pop.columns[0]:'state_region'}) |
1 | abb_pop = pd.merge(abb,pop,left_on='abbreviation',right_on='state_region',how='outer') |
1 | abb_pop |
| state | abbreviation | state_region | ages | year | population | |
|---|---|---|---|---|---|---|
| 0 | Alabama | AL | AL | under18 | 2012 | 1117489.0 |
| 1 | Alabama | AL | AL | total | 2012 | 4817528.0 |
| 2 | Alabama | AL | AL | under18 | 2010 | 1130966.0 |
| 3 | Alabama | AL | AL | total | 2010 | 4785570.0 |
| 4 | Alabama | AL | AL | under18 | 2011 | 1125763.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 2539 | NaN | NaN | USA | total | 2010 | 309326295.0 |
| 2540 | NaN | NaN | USA | under18 | 2011 | 73902222.0 |
| 2541 | NaN | NaN | USA | total | 2011 | 311582564.0 |
| 2542 | NaN | NaN | USA | under18 | 2012 | 73708179.0 |
| 2543 | NaN | NaN | USA | total | 2012 | 313873685.0 |
2544 rows × 6 columns
1 | abb_pop.isnull().sum() |
state 96
abbreviation 96
state_region 0
ages 0
year 0
population 20
dtype: int641 | abb_pop.drop(labels='abbreviation',axis=1,inplace=True) |
1 | abb_pop.isnull().sum() |
state 96
state_region 0
ages 0
year 0
population 20
dtype: int641 | #查找州名为空的行号 |
1 | filter_data = abb_pop.loc[inds] |
1 | #提取州名简写 |
array(['PR', 'USA'], dtype=object)1 | #先抽取简写为PR的数据行号 |
1 | usa_inds = abb_pop.query("state_region=='USA'").index |
1 | indexs = abb_pop.population.isnull() |
1 | #PR2000年的数据 |
1 | un18_data = filter_data.query("ages=='under18'") |
D:\Anaconda3\lib\site-packages\pandas\core\generic.py:5208: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self[name] = value1 | abb_pop_are = pd.merge(abb_pop,are,how='outer') |
1 | abb_pop_are = abb_pop_are.rename(columns={abb_pop_are.columns[5]:'area'}) |
1 | are_inds = abb_pop_are.area.isnull() |
D:\Anaconda3\lib\site-packages\pandas\core\generic.py:5208: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self[name] = value1 | fill_are |
| state | state_region | ages | year | population | area | |
|---|---|---|---|---|---|---|
| 2496 | USA | USA | under18 | 1990 | 64218512.0 | 3790399.0 |
| 2497 | USA | USA | total | 1990 | 249622814.0 | 3790399.0 |
| 2498 | USA | USA | total | 1991 | 252980942.0 | 3790399.0 |
| 2499 | USA | USA | under18 | 1991 | 65313018.0 | 3790399.0 |
| 2500 | USA | USA | under18 | 1992 | 66509177.0 | 3790399.0 |
| 2501 | USA | USA | total | 1992 | 256514231.0 | 3790399.0 |
| 2502 | USA | USA | total | 1993 | 259918595.0 | 3790399.0 |
| 2503 | USA | USA | under18 | 1993 | 67594938.0 | 3790399.0 |
| 2504 | USA | USA | under18 | 1994 | 68640936.0 | 3790399.0 |
| 2505 | USA | USA | total | 1994 | 263125826.0 | 3790399.0 |
| 2506 | USA | USA | under18 | 1995 | 69473140.0 | 3790399.0 |
| 2507 | USA | USA | under18 | 1996 | 70233512.0 | 3790399.0 |
| 2508 | USA | USA | total | 1995 | 266278403.0 | 3790399.0 |
| 2509 | USA | USA | total | 1996 | 269394291.0 | 3790399.0 |
| 2510 | USA | USA | total | 1997 | 272646932.0 | 3790399.0 |
| 2511 | USA | USA | under18 | 1997 | 70920738.0 | 3790399.0 |
| 2512 | USA | USA | under18 | 1998 | 71431406.0 | 3790399.0 |
| 2513 | USA | USA | total | 1998 | 275854116.0 | 3790399.0 |
| 2514 | USA | USA | under18 | 1999 | 71946051.0 | 3790399.0 |
| 2515 | USA | USA | total | 2000 | 282162411.0 | 3790399.0 |
| 2516 | USA | USA | under18 | 2000 | 72376189.0 | 3790399.0 |
| 2517 | USA | USA | total | 1999 | 279040181.0 | 3790399.0 |
| 2518 | USA | USA | total | 2001 | 284968955.0 | 3790399.0 |
| 2519 | USA | USA | under18 | 2001 | 72671175.0 | 3790399.0 |
| 2520 | USA | USA | total | 2002 | 287625193.0 | 3790399.0 |
| 2521 | USA | USA | under18 | 2002 | 72936457.0 | 3790399.0 |
| 2522 | USA | USA | total | 2003 | 290107933.0 | 3790399.0 |
| 2523 | USA | USA | under18 | 2003 | 73100758.0 | 3790399.0 |
| 2524 | USA | USA | total | 2004 | 292805298.0 | 3790399.0 |
| 2525 | USA | USA | under18 | 2004 | 73297735.0 | 3790399.0 |
| 2526 | USA | USA | total | 2005 | 295516599.0 | 3790399.0 |
| 2527 | USA | USA | under18 | 2005 | 73523669.0 | 3790399.0 |
| 2528 | USA | USA | total | 2006 | 298379912.0 | 3790399.0 |
| 2529 | USA | USA | under18 | 2006 | 73757714.0 | 3790399.0 |
| 2530 | USA | USA | total | 2007 | 301231207.0 | 3790399.0 |
| 2531 | USA | USA | under18 | 2007 | 74019405.0 | 3790399.0 |
| 2532 | USA | USA | total | 2008 | 304093966.0 | 3790399.0 |
| 2533 | USA | USA | under18 | 2008 | 74104602.0 | 3790399.0 |
| 2534 | USA | USA | under18 | 2013 | 73585872.0 | 3790399.0 |
| 2535 | USA | USA | total | 2013 | 316128839.0 | 3790399.0 |
| 2536 | USA | USA | total | 2009 | 306771529.0 | 3790399.0 |
| 2537 | USA | USA | under18 | 2009 | 74134167.0 | 3790399.0 |
| 2538 | USA | USA | under18 | 2010 | 74119556.0 | 3790399.0 |
| 2539 | USA | USA | total | 2010 | 309326295.0 | 3790399.0 |
| 2540 | USA | USA | under18 | 2011 | 73902222.0 | 3790399.0 |
| 2541 | USA | USA | total | 2011 | 311582564.0 | 3790399.0 |
| 2542 | USA | USA | under18 | 2012 | 73708179.0 | 3790399.0 |
| 2543 | USA | USA | total | 2012 | 313873685.0 | 3790399.0 |
1 | #2012 年各州成年人口和未成年人口比例 |
| lt18 | gt18 | |
|---|---|---|
| state | ||
| Alabama | 0.231963 | 0.768037 |
| Alaska | 0.257648 | 0.742352 |
| Arizona | 0.246850 | 0.753150 |
| Arkansas | 0.240852 | 0.759148 |
| California | 0.242343 | 0.757657 |
| Colorado | 0.237571 | 0.762429 |
| Connecticut | 0.221328 | 0.778672 |
| Delaware | 0.223091 | 0.776909 |
| District of Columbia | 0.169936 | 0.830064 |
| Florida | 0.207674 | 0.792326 |
| Georgia | 0.250900 | 0.749100 |
| Hawaii | 0.220116 | 0.779884 |
| Idaho | 0.267724 | 0.732276 |
| Illinois | 0.237566 | 0.762434 |
| Indiana | 0.243149 | 0.756851 |
| Iowa | 0.235417 | 0.764583 |
| Kansas | 0.251843 | 0.748157 |
| Kentucky | 0.232286 | 0.767714 |
| Louisiana | 0.242196 | 0.757804 |
| Maine | 0.199357 | 0.800643 |
| Montana | 0.221687 | 0.778313 |
| Nebraska | 0.249372 | 0.750628 |
| Nevada | 0.239495 | 0.760505 |
| New Hampshire | 0.208697 | 0.791303 |
| New Jersey | 0.229495 | 0.770505 |
| New Mexico | 0.245886 | 0.754114 |
| New York | 0.217852 | 0.782148 |
| North Carolina | 0.234308 | 0.765692 |
| North Dakota | 0.223521 | 0.776479 |
| Ohio | 0.230946 | 0.769054 |
| Oklahoma | 0.246322 | 0.753678 |
| Oregon | 0.220501 | 0.779499 |
| Maryland | 0.228762 | 0.771238 |
| Massachusetts | 0.210587 | 0.789413 |
| Michigan | 0.229634 | 0.770366 |
| Minnesota | 0.237571 | 0.762429 |
| Mississippi | 0.248771 | 0.751229 |
| Missouri | 0.233216 | 0.766784 |
| Pennsylvania | 0.214494 | 0.785506 |
| Rhode Island | 0.206217 | 0.793783 |
| South Carolina | 0.228109 | 0.771891 |
| South Dakota | 0.246147 | 0.753853 |
| Tennessee | 0.231248 | 0.768752 |
| Texas | 0.268058 | 0.731942 |
| Utah | 0.311250 | 0.688750 |
| Vermont | 0.198985 | 0.801015 |
| Virginia | 0.227361 | 0.772639 |
| Washington | 0.230367 | 0.769633 |
| West Virginia | 0.206837 | 0.793163 |
| Wisconsin | 0.229907 | 0.770093 |
| Wyoming | 0.236767 | 0.763233 |
| Puerto Rico | 0.230516 | 0.769484 |
| USA | 0.234834 | 0.765166 |
1 | import matplotlib.pyplot as plt |
1 | #堆叠图 |
<matplotlib.axes._subplots.AxesSubplot at 0x2a1fae11688>
