Skip to content

NPACore/nih-grants

Repository files navigation

NIH Funding 2024

Notes

proj |> ungroup() |>
  summarise(n_projects=n(),
            n_organizatoins=length(unique(org)),
            PIperProj_mean = mean(n_pi),
            across(c(direct=direct_cost_amt,
                     indirect=indirect_cost_amt,
                     awarded=award_amount),
                   c(total=\(x) sum(x,na.rm=T),
                     median=\(x) median(x, na.rm=T)))) |>
  fmt() |>
  t()
n_projects79,374
n_organizatoins2,888
PIperProj_mean2.6
direct_total29,847,146,202
direct_median256,779
indirect_total10,805,710,856
indirect_median112,671
awarded_total42,169,253,525
awarded_median388,750
proj |> filter(letter %in% c('R','K','F')) |>
ggplot() +
  aes(x=direct_cost_amt, fill=letter) +
  geom_histogram(alpha=.7, position='dodge') +
  #geom_density(alpha=.7) +
  scale_x_continuous(trans='log10', limits=c(5000,NA)) +
  cowplot::theme_cowplot() +
  #facet_wrap(letter~.) +
  labs(fill="grant", x="direct cost (log)", title="distribrution of award by type")

img/proj_hist.png

Top

Institution

proj_org_smry <- proj |>
  group_by(org) |>
  summarise(
    amount=sum(award_amount),
    n_proj=n(),
    n_R=length(which(letter=="R")),
    n_K=length(which(letter=="K")),
    mean_n_pi = mean(n_pi),
    median_amount = median(award_amount),
    direct_cost=sum(direct_cost_amt,na.rm=T),
    indirect_cost=sum(indirect_cost_amt,na.rm=T))

proj_org_smry |> arrange(-amount) |> head() |>fmt()
orgamountn_projn_Rn_Kmean_n_pimedian_amountdirect_costindirect_cost
1JOHNS HOPKINS UNIVERSITY967,554,6201,8269221912.8404,828716,140,229267,819,162
2UNIVERSITY OF CALIFORNIA, SAN FRANCISCO923,404,3911,7418892322.5395,536680,359,737245,279,242
3WASHINGTON UNIVERSITY901,899,9061,4558051233.1393,750679,451,125223,127,797
4UNIVERSITY OF MICHIGAN AT ANN ARBOR840,742,0851,6639651552.7388,052613,767,433228,042,377
5UNIVERSITY OF PENNSYLVANIA790,934,5801,5578241322.6406,043560,715,831235,792,132
6UNIVERSITY OF PITTSBURGH AT PITTSBURGH747,102,1721,4358581142.7400,107542,554,963209,616,818

Contact PI

proj_pi_smry <- proj |>
  group_by(contact_pi) |>
  # remove first and middle name
  mutate(contact_pi=gsub(':.* ',':', toupper(contact_pi))) |>
  summarise(
    amount=sum(award_amount),
    direct=sum(direct_cost_amt,na.rm=T),
    n_proj=n(),
    mean_n_pi = mean(n_pi),
    median_amount = median(award_amount),
    org=substr(paste(collapse=";", gsub('(UNIVERSITY|OF|SCHOOL|INSTITUTE) ?','', unique(org))),0,100))
proj_pi_smry |> filter(amount>100) |>
  mutate(projects=cut(n_proj, breaks=c(0,1,2,5,10,50,Inf)),
         pis=cut(mean_n_pi, breaks=c(0,1,2,5,10,50))) |>
  ggplot() +
  aes(x=amount, fill=pis) +
  geom_histogram() +
  scale_x_continuous(trans='log10') +
  cowplot::theme_cowplot() +
  labs(fill="mean N co-pi", title="Amount per contact-PI")

img/pi.png

summary(proj_pi_smry$amount)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's
        1    211712    462500    851443    858892 341743406      1620

By total amount

proj_pi_smry |>
  arrange(-amount) |>
  head() |> fmt()
contact_piamountn_projmean_n_pimedian_amountorg
179478801:BRISCOE341,743,406431.01,117,108LEIDOS BIOMEDICAL RESEARCH, INC.
210753426:NOLEN289,804,00021.5144,902,000RESEARCH TRIANGLE
310829359:GROSS110,114,21724.055,057,108NEW YORK MEDICINE
478492086:MONTALVAN66,600,00021.033,300,000WESTAT, INC.
51882258:BOXER61,341,88778.3984,055CALIFORNIA, SAN FRANCISCO;MAYO CLINIC ROCHESTER
66190835:DIAMOND60,794,967125.5765,900STANFORD ;MARYLAND BALTIMORE;WASHINGTON ;PITTSBURGH AT PITTSBURGH

By number of projects

proj_pi_smry |>
  filter(contact_pi!="NONE:") |>
  arrange(-n_proj) |>
  head() |>fmt()
contact_piamountn_projmean_n_pimedian_amountorg
179478801:BRISCOE341,743,4064311,117,108LEIDOS BIOMEDICAL RESEARCH, INC.
21891624:EBERLEIN13,217,1713023258,998WASHINGTON
38497898:SHEPPARD359,30028114,250KEYSTONE SYMPOSIA
47039607:STEWART2,027,36025140,000COLD SPRING HARBOR LABORATORY
579112606:FREEDMAN46,535,946251225,042LEIDOS BIOMEDICAL RESEARCH, INC.
66774622:PASCHE4,805,560232150,162WAYNE STATE ;WAKE FOREST HEALTH SCIENCES

Code

See Makefile.

get_2024.py Uses the NIH reporter’s api, but goes by state (and DC + PR) to avoid return limits. This saves all output (>700Mb!) to a pickle file.

grants_to_csv.py parses the pickle to csv.

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published