diff --git a/src/domain.rs b/src/domain.rs
index c636855b7..ddba4f430 100644
--- a/src/domain.rs
+++ b/src/domain.rs
@@ -1,14 +1,15 @@
-//! This module contains an `EvaluationDomain` abstraction for
-//! performing various kinds of polynomial arithmetic on top of
-//! the scalar field.
+//! This module contains an [`EvaluationDomain`] abstraction for performing
+//! various kinds of polynomial arithmetic on top of the scalar field.
//!
-//! In pairing-based SNARKs like Groth16, we need to calculate
-//! a quotient polynomial over a target polynomial with roots
-//! at distinct points associated with each constraint of the
-//! constraint system. In order to be efficient, we choose these
-//! roots to be the powers of a 2^n root of unity in the field.
-//! This allows us to perform polynomial operations in O(n)
-//! by performing an O(n log n) FFT over such a domain.
+//! In pairing-based SNARKs like [Groth16], we need to calculate a quotient
+//! polynomial over a target polynomial with roots at distinct points associated
+//! with each constraint of the constraint system. In order to be efficient, we
+//! choose these roots to be the powers of a 2n root of unity in the
+//! field. This allows us to perform polynomial operations in O(n) by performing
+//! an O(n log n) FFT over such a domain.
+//!
+//! [`EvaluationDomain`]: crate::domain::EvaluationDomain
+//! [Groth16]: https://eprint.iacr.org/2016/260
use ff::{Field, PrimeField, ScalarEngine};
use group::CurveProjective;
diff --git a/src/gadgets.rs b/src/gadgets.rs
index cf366df37..b0ce73472 100644
--- a/src/gadgets.rs
+++ b/src/gadgets.rs
@@ -1,3 +1,5 @@
+//! Self-contained sub-circuit implementations for various primitives.
+
pub mod test;
pub mod blake2s;
diff --git a/src/gadgets/blake2s.rs b/src/gadgets/blake2s.rs
index 96e554bf7..e98fd55f6 100644
--- a/src/gadgets/blake2s.rs
+++ b/src/gadgets/blake2s.rs
@@ -1,3 +1,7 @@
+//! The [BLAKE2s] hash function with personalization support.
+//!
+//! [BLAKE2s]: https://tools.ietf.org/html/rfc7693
+
use super::{boolean::Boolean, multieq::MultiEq, uint32::UInt32};
use crate::{ConstraintSystem, SynthesisError};
use ff::ScalarEngine;
diff --git a/src/gadgets/boolean.rs b/src/gadgets/boolean.rs
index bbc0d302a..e08974d2a 100644
--- a/src/gadgets/boolean.rs
+++ b/src/gadgets/boolean.rs
@@ -1,3 +1,5 @@
+//! Gadgets for allocating bits in the circuit and performing boolean logic.
+
use ff::{BitIterator, Field, PrimeField, ScalarEngine};
use crate::{ConstraintSystem, LinearCombination, SynthesisError, Variable};
diff --git a/src/gadgets/lookup.rs b/src/gadgets/lookup.rs
index 8124b22a3..b83844de4 100644
--- a/src/gadgets/lookup.rs
+++ b/src/gadgets/lookup.rs
@@ -1,3 +1,5 @@
+//! Window table lookup gadgets.
+
use ff::{Field, ScalarEngine};
use super::boolean::Boolean;
diff --git a/src/gadgets/multipack.rs b/src/gadgets/multipack.rs
index 1fa1967a1..c0dc50e1f 100644
--- a/src/gadgets/multipack.rs
+++ b/src/gadgets/multipack.rs
@@ -1,3 +1,5 @@
+//! Helpers for packing vectors of bits into scalar field elements.
+
use super::boolean::Boolean;
use super::num::Num;
use super::Assignment;
diff --git a/src/gadgets/num.rs b/src/gadgets/num.rs
index b7caf6df8..8be54480e 100644
--- a/src/gadgets/num.rs
+++ b/src/gadgets/num.rs
@@ -1,3 +1,5 @@
+//! Gadgets representing numbers in the scalar field of the underlying curve.
+
use ff::{BitIterator, Field, PrimeField, PrimeFieldRepr, ScalarEngine};
use crate::{ConstraintSystem, LinearCombination, SynthesisError, Variable};
diff --git a/src/gadgets/sha256.rs b/src/gadgets/sha256.rs
index a875513fb..0c8efea70 100644
--- a/src/gadgets/sha256.rs
+++ b/src/gadgets/sha256.rs
@@ -1,3 +1,8 @@
+//! Circuits for the [SHA-256] hash function and its internal compression
+//! function.
+//!
+//! [SHA-256]: https://tools.ietf.org/html/rfc6234
+
use super::boolean::Boolean;
use super::multieq::MultiEq;
use super::uint32::UInt32;
diff --git a/src/gadgets/test/mod.rs b/src/gadgets/test/mod.rs
index fedfe9492..47392f147 100644
--- a/src/gadgets/test/mod.rs
+++ b/src/gadgets/test/mod.rs
@@ -1,3 +1,5 @@
+//! Helpers for testing circuit implementations.
+
use ff::{Field, PrimeField, PrimeFieldRepr, ScalarEngine};
use crate::{ConstraintSystem, Index, LinearCombination, SynthesisError, Variable};
diff --git a/src/gadgets/uint32.rs b/src/gadgets/uint32.rs
index cf8e3906a..a10be6c03 100644
--- a/src/gadgets/uint32.rs
+++ b/src/gadgets/uint32.rs
@@ -1,3 +1,6 @@
+//! Circuit representation of a [`u32`], with helpers for the [`sha256`]
+//! gadgets.
+
use ff::{Field, PrimeField, ScalarEngine};
use crate::{ConstraintSystem, LinearCombination, SynthesisError};
diff --git a/src/groth16/mod.rs b/src/groth16/mod.rs
index 44c6f22f2..1ff152d60 100644
--- a/src/groth16/mod.rs
+++ b/src/groth16/mod.rs
@@ -1,3 +1,7 @@
+//! The [Groth16] proving system.
+//!
+//! [Groth16]: https://eprint.iacr.org/2016/260
+
use group::{CurveAffine, EncodedPoint};
use pairing::{Engine, PairingCurveAffine};
diff --git a/src/lib.rs b/src/lib.rs
index 5152b0f10..a3b577b99 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,137 @@
+//! `bellman` is a crate for building zk-SNARK circuits. It provides circuit
+//! traits and and primitive structures, as well as basic gadget implementations
+//! such as booleans and number abstractions.
+//!
+//! # Example circuit
+//!
+//! Say we want to write a circuit that proves we know the preimage to some hash
+//! computed using SHA-256d (calling SHA-256 twice). The preimage must have a
+//! fixed length known in advance (because the circuit parameters will depend on
+//! it), but can otherwise have any value. We take the following strategy:
+//!
+//! - Witness each bit of the preimage.
+//! - Compute `hash = SHA-256d(preimage)` inside the circuit.
+//! - Expose `hash` as a public input using multiscalar packing.
+//!
+//! ```
+//! use bellman::{
+//! gadgets::{
+//! boolean::{AllocatedBit, Boolean},
+//! multipack,
+//! sha256::sha256,
+//! },
+//! groth16, Circuit, ConstraintSystem, SynthesisError,
+//! };
+//! use pairing::{bls12_381::Bls12, Engine};
+//! use rand::rngs::OsRng;
+//! use sha2::{Digest, Sha256};
+//!
+//! /// Our own SHA-256d gadget. Input and output are in little-endian bit order.
+//! fn sha256d>(
+//! mut cs: CS,
+//! data: &[Boolean],
+//! ) -> Result, SynthesisError> {
+//! // Flip endianness of each input byte
+//! let input: Vec<_> = data
+//! .chunks(8)
+//! .map(|c| c.iter().rev())
+//! .flatten()
+//! .cloned()
+//! .collect();
+//!
+//! let mid = sha256(cs.namespace(|| "SHA-256(input)"), &input)?;
+//! let res = sha256(cs.namespace(|| "SHA-256(mid)"), &mid)?;
+//!
+//! // Flip endianness of each output byte
+//! Ok(res
+//! .chunks(8)
+//! .map(|c| c.iter().rev())
+//! .flatten()
+//! .cloned()
+//! .collect())
+//! }
+//!
+//! struct MyCircuit {
+//! /// The input to SHA-256d we are proving that we know. Set to `None` when we
+//! /// are verifying a proof (and do not have the witness data).
+//! preimage: Option<[u8; 80]>,
+//! }
+//!
+//! impl Circuit for MyCircuit {
+//! fn synthesize>(self, cs: &mut CS) -> Result<(), SynthesisError> {
+//! // Compute the values for the bits of the preimage. If we are verifying a proof,
+//! // we still need to create the same constraints, so we return an equivalent-size
+//! // Vec of None (indicating that the value of each bit is unknown).
+//! let bit_values = if let Some(preimage) = self.preimage {
+//! preimage
+//! .into_iter()
+//! .map(|byte| (0..8).map(move |i| (byte >> i) & 1u8 == 1u8))
+//! .flatten()
+//! .map(|b| Some(b))
+//! .collect()
+//! } else {
+//! vec![None; 80 * 8]
+//! };
+//! assert_eq!(bit_values.len(), 80 * 8);
+//!
+//! // Witness the bits of the preimage.
+//! let preimage_bits = bit_values
+//! .into_iter()
+//! .enumerate()
+//! // Allocate each bit.
+//! .map(|(i, b)| {
+//! AllocatedBit::alloc(cs.namespace(|| format!("preimage bit {}", i)), b)
+//! })
+//! // Convert the AllocatedBits into Booleans (required for the sha256 gadget).
+//! .map(|b| b.map(Boolean::from))
+//! .collect::, _>>()?;
+//!
+//! // Compute hash = SHA-256d(preimage).
+//! let hash = sha256d(cs.namespace(|| "SHA-256d(preimage)"), &preimage_bits)?;
+//!
+//! // Expose the vector of 32 boolean variables as compact public inputs.
+//! multipack::pack_into_inputs(cs.namespace(|| "pack hash"), &hash)
+//! }
+//! }
+//!
+//! // Create parameters for our circuit. In a production deployment these would
+//! // be generated securely using a multiparty computation.
+//! let params = {
+//! let c = MyCircuit { preimage: None };
+//! groth16::generate_random_parameters::(c, &mut OsRng).unwrap()
+//! };
+//!
+//! // Prepare the verification key (for proof verification).
+//! let pvk = groth16::prepare_verifying_key(¶ms.vk);
+//!
+//! // Pick a preimage and compute its hash.
+//! let preimage = [42; 80];
+//! let hash = Sha256::digest(&Sha256::digest(&preimage));
+//!
+//! // Create an instance of our circuit (with the preimage as a witness).
+//! let c = MyCircuit {
+//! preimage: Some(preimage),
+//! };
+//!
+//! // Create a Groth16 proof with our parameters.
+//! let proof = groth16::create_random_proof(c, ¶ms, &mut OsRng).unwrap();
+//!
+//! // Pack the hash as inputs for proof verification.
+//! let hash_bits = multipack::bytes_to_bits_le(&hash);
+//! let inputs = multipack::compute_multipacking::(&hash_bits);
+//!
+//! // Check the proof!
+//! assert!(groth16::verify_proof(&pvk, &proof, &inputs).unwrap());
+//! ```
+//!
+//! # Roadmap
+//!
+//! `bellman` is being refactored into a generic proving library. Currently it
+//! is pairing-specific, and different types of proving systems need to be
+//! implemented as sub-modules. After the refactor, `bellman` will be generic
+//! using the [`ff`] and [`group`] crates, while specific proving systems will
+//! be separate crates that pull in the dependencies they require.
+
// Catch documentation errors caused by code changes.
#![deny(intra_doc_link_resolution_failure)]
diff --git a/src/multicore.rs b/src/multicore.rs
index ff97e06ba..ba69b5f33 100644
--- a/src/multicore.rs
+++ b/src/multicore.rs
@@ -1,8 +1,9 @@
-//! This is an interface for dealing with the kinds of
-//! parallel computations involved in bellman. It's
-//! currently just a thin wrapper around CpuPool and
-//! crossbeam but may be extended in the future to
-//! allow for various parallelism strategies.
+//! An interface for dealing with the kinds of parallel computations involved in
+//! `bellman`. It's currently just a thin wrapper around [`CpuPool`] and
+//! [`crossbeam`] but may be extended in the future to allow for various
+//! parallelism strategies.
+//!
+//! [`CpuPool`]: futures_cpupool::CpuPool
#[cfg(feature = "multicore")]
mod implementation {