1 | import pandas as pd |
1 | #州名的简写 |
1 | pop.columns[0] |
'state/region'
1 | pop = pop.rename(columns={pop.columns[0]:'state_region'}) |
1 | abb_pop = pd.merge(abb,pop,left_on='abbreviation',right_on='state_region',how='outer') |
1 | abb_pop |
state | abbreviation | state_region | ages | year | population | |
---|---|---|---|---|---|---|
0 | Alabama | AL | AL | under18 | 2012 | 1117489.0 |
1 | Alabama | AL | AL | total | 2012 | 4817528.0 |
2 | Alabama | AL | AL | under18 | 2010 | 1130966.0 |
3 | Alabama | AL | AL | total | 2010 | 4785570.0 |
4 | Alabama | AL | AL | under18 | 2011 | 1125763.0 |
... | ... | ... | ... | ... | ... | ... |
2539 | NaN | NaN | USA | total | 2010 | 309326295.0 |
2540 | NaN | NaN | USA | under18 | 2011 | 73902222.0 |
2541 | NaN | NaN | USA | total | 2011 | 311582564.0 |
2542 | NaN | NaN | USA | under18 | 2012 | 73708179.0 |
2543 | NaN | NaN | USA | total | 2012 | 313873685.0 |
2544 rows × 6 columns
1 | abb_pop.isnull().sum() |
state 96
abbreviation 96
state_region 0
ages 0
year 0
population 20
dtype: int64
1 | abb_pop.drop(labels='abbreviation',axis=1,inplace=True) |
1 | abb_pop.isnull().sum() |
state 96
state_region 0
ages 0
year 0
population 20
dtype: int64
1 | #查找州名为空的行号 |
1 | filter_data = abb_pop.loc[inds] |
1 | #提取州名简写 |
array(['PR', 'USA'], dtype=object)
1 | #先抽取简写为PR的数据行号 |
1 | usa_inds = abb_pop.query("state_region=='USA'").index |
1 | indexs = abb_pop.population.isnull() |
1 | #PR2000年的数据 |
1 | un18_data = filter_data.query("ages=='under18'") |
D:\Anaconda3\lib\site-packages\pandas\core\generic.py:5208: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self[name] = value
1 | abb_pop_are = pd.merge(abb_pop,are,how='outer') |
1 | abb_pop_are = abb_pop_are.rename(columns={abb_pop_are.columns[5]:'area'}) |
1 | are_inds = abb_pop_are.area.isnull() |
D:\Anaconda3\lib\site-packages\pandas\core\generic.py:5208: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self[name] = value
1 | fill_are |
state | state_region | ages | year | population | area | |
---|---|---|---|---|---|---|
2496 | USA | USA | under18 | 1990 | 64218512.0 | 3790399.0 |
2497 | USA | USA | total | 1990 | 249622814.0 | 3790399.0 |
2498 | USA | USA | total | 1991 | 252980942.0 | 3790399.0 |
2499 | USA | USA | under18 | 1991 | 65313018.0 | 3790399.0 |
2500 | USA | USA | under18 | 1992 | 66509177.0 | 3790399.0 |
2501 | USA | USA | total | 1992 | 256514231.0 | 3790399.0 |
2502 | USA | USA | total | 1993 | 259918595.0 | 3790399.0 |
2503 | USA | USA | under18 | 1993 | 67594938.0 | 3790399.0 |
2504 | USA | USA | under18 | 1994 | 68640936.0 | 3790399.0 |
2505 | USA | USA | total | 1994 | 263125826.0 | 3790399.0 |
2506 | USA | USA | under18 | 1995 | 69473140.0 | 3790399.0 |
2507 | USA | USA | under18 | 1996 | 70233512.0 | 3790399.0 |
2508 | USA | USA | total | 1995 | 266278403.0 | 3790399.0 |
2509 | USA | USA | total | 1996 | 269394291.0 | 3790399.0 |
2510 | USA | USA | total | 1997 | 272646932.0 | 3790399.0 |
2511 | USA | USA | under18 | 1997 | 70920738.0 | 3790399.0 |
2512 | USA | USA | under18 | 1998 | 71431406.0 | 3790399.0 |
2513 | USA | USA | total | 1998 | 275854116.0 | 3790399.0 |
2514 | USA | USA | under18 | 1999 | 71946051.0 | 3790399.0 |
2515 | USA | USA | total | 2000 | 282162411.0 | 3790399.0 |
2516 | USA | USA | under18 | 2000 | 72376189.0 | 3790399.0 |
2517 | USA | USA | total | 1999 | 279040181.0 | 3790399.0 |
2518 | USA | USA | total | 2001 | 284968955.0 | 3790399.0 |
2519 | USA | USA | under18 | 2001 | 72671175.0 | 3790399.0 |
2520 | USA | USA | total | 2002 | 287625193.0 | 3790399.0 |
2521 | USA | USA | under18 | 2002 | 72936457.0 | 3790399.0 |
2522 | USA | USA | total | 2003 | 290107933.0 | 3790399.0 |
2523 | USA | USA | under18 | 2003 | 73100758.0 | 3790399.0 |
2524 | USA | USA | total | 2004 | 292805298.0 | 3790399.0 |
2525 | USA | USA | under18 | 2004 | 73297735.0 | 3790399.0 |
2526 | USA | USA | total | 2005 | 295516599.0 | 3790399.0 |
2527 | USA | USA | under18 | 2005 | 73523669.0 | 3790399.0 |
2528 | USA | USA | total | 2006 | 298379912.0 | 3790399.0 |
2529 | USA | USA | under18 | 2006 | 73757714.0 | 3790399.0 |
2530 | USA | USA | total | 2007 | 301231207.0 | 3790399.0 |
2531 | USA | USA | under18 | 2007 | 74019405.0 | 3790399.0 |
2532 | USA | USA | total | 2008 | 304093966.0 | 3790399.0 |
2533 | USA | USA | under18 | 2008 | 74104602.0 | 3790399.0 |
2534 | USA | USA | under18 | 2013 | 73585872.0 | 3790399.0 |
2535 | USA | USA | total | 2013 | 316128839.0 | 3790399.0 |
2536 | USA | USA | total | 2009 | 306771529.0 | 3790399.0 |
2537 | USA | USA | under18 | 2009 | 74134167.0 | 3790399.0 |
2538 | USA | USA | under18 | 2010 | 74119556.0 | 3790399.0 |
2539 | USA | USA | total | 2010 | 309326295.0 | 3790399.0 |
2540 | USA | USA | under18 | 2011 | 73902222.0 | 3790399.0 |
2541 | USA | USA | total | 2011 | 311582564.0 | 3790399.0 |
2542 | USA | USA | under18 | 2012 | 73708179.0 | 3790399.0 |
2543 | USA | USA | total | 2012 | 313873685.0 | 3790399.0 |
1 | #2012 年各州成年人口和未成年人口比例 |
lt18 | gt18 | |
---|---|---|
state | ||
Alabama | 0.231963 | 0.768037 |
Alaska | 0.257648 | 0.742352 |
Arizona | 0.246850 | 0.753150 |
Arkansas | 0.240852 | 0.759148 |
California | 0.242343 | 0.757657 |
Colorado | 0.237571 | 0.762429 |
Connecticut | 0.221328 | 0.778672 |
Delaware | 0.223091 | 0.776909 |
District of Columbia | 0.169936 | 0.830064 |
Florida | 0.207674 | 0.792326 |
Georgia | 0.250900 | 0.749100 |
Hawaii | 0.220116 | 0.779884 |
Idaho | 0.267724 | 0.732276 |
Illinois | 0.237566 | 0.762434 |
Indiana | 0.243149 | 0.756851 |
Iowa | 0.235417 | 0.764583 |
Kansas | 0.251843 | 0.748157 |
Kentucky | 0.232286 | 0.767714 |
Louisiana | 0.242196 | 0.757804 |
Maine | 0.199357 | 0.800643 |
Montana | 0.221687 | 0.778313 |
Nebraska | 0.249372 | 0.750628 |
Nevada | 0.239495 | 0.760505 |
New Hampshire | 0.208697 | 0.791303 |
New Jersey | 0.229495 | 0.770505 |
New Mexico | 0.245886 | 0.754114 |
New York | 0.217852 | 0.782148 |
North Carolina | 0.234308 | 0.765692 |
North Dakota | 0.223521 | 0.776479 |
Ohio | 0.230946 | 0.769054 |
Oklahoma | 0.246322 | 0.753678 |
Oregon | 0.220501 | 0.779499 |
Maryland | 0.228762 | 0.771238 |
Massachusetts | 0.210587 | 0.789413 |
Michigan | 0.229634 | 0.770366 |
Minnesota | 0.237571 | 0.762429 |
Mississippi | 0.248771 | 0.751229 |
Missouri | 0.233216 | 0.766784 |
Pennsylvania | 0.214494 | 0.785506 |
Rhode Island | 0.206217 | 0.793783 |
South Carolina | 0.228109 | 0.771891 |
South Dakota | 0.246147 | 0.753853 |
Tennessee | 0.231248 | 0.768752 |
Texas | 0.268058 | 0.731942 |
Utah | 0.311250 | 0.688750 |
Vermont | 0.198985 | 0.801015 |
Virginia | 0.227361 | 0.772639 |
Washington | 0.230367 | 0.769633 |
West Virginia | 0.206837 | 0.793163 |
Wisconsin | 0.229907 | 0.770093 |
Wyoming | 0.236767 | 0.763233 |
Puerto Rico | 0.230516 | 0.769484 |
USA | 0.234834 | 0.765166 |
1 | import matplotlib.pyplot as plt |
1 | #堆叠图 |
<matplotlib.axes._subplots.AxesSubplot at 0x2a1fae11688>