Objectives

This notebook summarizes results from the 2020 S34D field surveys.

The objective of this activity is to understand the operational efficiency and profitability of seed producer groups across different environments (multiple geographies, policy and regulatory systems). The farmers in the surveyed groups produce certified seeds as well as high-quality informal seeds that go through internal seed quality assurance mechanisms.

We have chosen several countries that are at different stages of agricultural transformation and with varying degrees of seed systems maturity. Comparing and contrasting these seed producer clubs within and across their respective contexts can help guide development partners in designing interventions and inform policy making and seed system regulations.

The analysis is split by country. R code chunks are shown for reproducibility.

library(thematic)
library(showtext)
library(viridisLite)
library(lubridate)
library(kableExtra)
library(scales)
library(ggpubr)
library(data.table)
library(summarytools)
library(ggalluvial)
library(GGally)
library(ttt)

rm(list=ls())

# Global package options
options(
  ttt.theme="booktabs") 
st_options(
  plain.ascii=FALSE, bootstrap.css=FALSE, headings=FALSE, footnote=NA,
  dfSummary.graph.magnif=.8, dfSummary.varnumbers=FALSE, dfSummary.graph.col = FALSE) 
knitr::opts_chunk$set(
  fig.height=4, fig.width=6, dpi=220, fig.path="fig/", fig.showtext=TRUE, 
  include=TRUE, error=FALSE, warning=FALSE, message=FALSE)

# Helper - Default USAID FtF colors for plots
pal <- c("#4799b5", "#94a545", "#d37d28", "#8B1E3F", "#403b33")

thematic_on("transparent", "#2f2f29", c("#4799b5", "#e6e7e8"),
  font_spec(c("Roboto Condensed", "Roboto")),
  qualitative = pal)

theme_def <- function(
  base_size = 7, 
  base_family = "Roboto Condensed",
  base_fill = "transparent",
  ...) theme_minimal(base_size, base_family) +
  theme(
    panel.background = element_rect(fill=base_fill, color=NA),
    plot.background = element_rect(fill=base_fill, color=NA),
    legend.box.background = element_rect(fill=base_fill, color=NA),
    text = element_text(lineheight=.8),
    plot.title = element_text(size=base_size+1),
    plot.subtitle = element_text(color=pal[1], size=base_size),
    strip.text = element_text(face="bold", hjust=0, size=base_size),
    axis.text = element_text(size=base_size),
    axis.title = element_text(hjust=0),
    legend.key.width = unit(.8, "line"),
    legend.key.height = unit(.8, "line")
  ) + theme(...)

# Helper - Default theme for tables
kbl <- function(...) kableExtra::kbl(...) %>% 
  kable_styling(
    bootstrap_options=c("condensed", "responsive", "striped", "hover"), 
    font_size=14)

# Helper - TTT Formatters
fmt <- function(x) c(
  mean = comma(mean(x, na.rm=T)), 
  median = comma(median(x, na.rm=T)),
  sd = comma(sd(x, na.rm=T)))

fmt_pct <- function(x) c(
  mean = percent(mean(x, na.rm=T)), 
  median = percent(median(x, na.rm=T)),
  sd = percent(sd(x, na.rm=T)))

count <- function(x) data.frame(y=Inf, label=paste("N =", length(x)))
ggBoxTest <- function(..., jitter=TRUE, grp.c=NULL, grp.s=NULL, cp=NULL, ref=NULL) {
  p = ggplot(...)
  if(jitter) p = p + geom_jitter(alpha=.5, size=.2)
  p = p + 
    geom_boxplot(alpha=.7, outlier.shape=NA) +
    stat_summary(grp.c, fun.data=count, geom="text", size=2.5, color=1, vjust=1)
  if(is.null(cp) && is.null(ref)) p = p +
    stat_compare_means(grp.s, label="p.signif", size=2.5, vjust=1, fontface="bold") +
    stat_compare_means(grp.s, label="p.format", size=2.5, vjust=3)
  else p = p +
    stat_compare_means(comparisons=cp, ref.group=ref, label="p.signif", vjust=0, size=2.5)
}

# Helper - Contingency table
ttt_ftable <- function(data, vars, ...) {
  stopifnot(length(vars)==3)
  dt = data[, .SD, .SDcols=vars]
  N = nrow(dt)
  dt = table(dt)
  test = mantelhaen.test(dt)
  dt = dt %>%
    prop.table() %>%
    addmargins(...) %>%
    ftable()
  names(attr(dt, "row.vars")) = vars[1:2]
  dt = round(100*dt, digits=1)
  ttt(dt,
    caption="Contingency Table (% of respondents)",
    footnote=paste(
      "N =", N,
      "| Mantel-Haenszel chi-squared =", comma(test[[1]], acc=.01),
      "| p-value = ", comma(test[[3]], acc=.0001))
  )
}

Codebook

All survey datasets have been consolidated into a unique data file. A unified codebook is shown below for reference.

# Load unified codebook
lbl <- fread("../data/codebook.csv")
kbl(lbl, align="llccc", caption="Consolidated Survey Codebook")
Tab. 1: Consolidated Survey Codebook
code label type unit constructed notes
hhid Unique farmer/crop ID character uid TRUE
iso3 ISO3 country code character class TRUE
adm1_nm Province character class FALSE
adm1_nm Region character class FALSE
adm2_nm District character class FALSE
adm3_nm Location character class FALSE
group Group character class FALSE
code Code character class FALSE
age Age character class FALSE < 30, ≥ 30
age_num Age (continunous) numeric years FALSE
gender Sex character class FALSE Male, Female
crop Crop character class FALSE
member_years How long have you been a member of this group? numeric years FALSE
seed_ha_lcu Cost seed per ha (LCU) numeric lcu/ha FALSE
fert_ha_lcu Cost of fertilizer per ha (LCU) numeric lcu/ha FALSE
pest_ha_lcu Cost of pesticide per ha (LCU) numeric lcu/ha FALSE
tran_ha_lcu Cost of transport per ha (LCU) numeric lcu/ha FALSE
labor_ha_lcu Labor cost (LCU) numeric lcu/ha FALSE
cert_lcu Inspection / certification Fees (LCU) numeric lcu/farm/season FALSE
labl_kg_lcu Labelling costs per kg (LCU) numeric lcu/kg FALSE
pckg_kg_lcu Packaging costs per kg (LCU) numeric lcu/kg FALSE
mark_kg_lcu Other marketing costs? (LCU) numeric lcu/kg FALSE
yield_ha_kg Estimated Yield (kg/ha) numeric kg/ha FALSE
sales_kg_lcu Selling price of seed per kg (LCU) numeric lcu/kg FALSE
sales_grain_sowing_kg_lcu Selling price of grain per kg (LCU) at sowing numeric lcu/kg FALSE
sales_grain_harvest_kg_lcu Selling price of grain per kg (LCU) at harvest numeric lcu/kg FALSE
sales_ha_kg How many kg were sold in the season? numeric kg/ha FALSE
margin_exp_ha_sh What was your expected gross margin? numeric lcu/ha FALSE
area_ha Area (ha) numeric ha FALSE
costs_exp_ha_lcu Total production cost numeric lcu/ha TRUE
costs_real_ha_lcu Total production costs per ha (realized yield) numeric lcu/ha TRUE
sales_exp_ha_lcu Gross sales numeric lcu/ha TRUE
sales_real_ha_lcu Gross sales per ha (realized yield) numeric lcu/ha TRUE
costs_ha_ppp Total production costs per ha in PPP equivalent numeric ppp/ha TRUE
sales_ha_sh Gross sales as share of tpc numeric share TRUE
sales_ha_ppp Gross sales in PPP equivalent numeric ppp/ha TRUE
margin_ha_lcu Gross margin per ha numeric lcu/ha TRUE
margin_ha_sh Margin as share of tpc numeric share TRUE
margin_ha_ppp Margin in PPP equivalent numeric ppp/ha TRUE
tfp Total factor productivity numeric kg/lcu TRUE
ssd Farmer participates in formal seed system distribution logical true/false TRUE Informal, Formal
group_year Year the group was established integer year TRUE
group_size Members in the group integer members TRUE
soil_type Primary soil type character class TRUE
seasons Number of seasons per year integer seasons/year TRUE
irrigated Level of irrigation character class TRUE
market_access Market access character class TRUE
ttrade Presence of transboundary trade logical true/false TRUE
date Date date date FALSE