Product: Segmentation Engine

What it is

A dynamic query builder that translates a structured JSON definition into a SQL query against the students table. Used everywhere: cycle eligibility, job eligibility, student intel views, bulk enrollment, analytics, export.

Build it once as a module. Call it from everywhere.


Definition schema

interface SegmentationDefinition {
  groups: SegmentationGroup[];
  groupOperator: 'AND' | 'OR';       // how groups combine with each other
  includeIndividuals: string[];        // studentIds always included regardless of groups
  excludeIndividuals: string[];        // studentIds always excluded regardless of groups
}
 
interface SegmentationGroup {
  id: string;                          // client-side ID for UI tracking
  operator: 'AND' | 'OR';             // how conditions within this group combine
  conditions: SegmentationCondition[];
}
 
interface SegmentationCondition {
  id: string;                          // client-side ID for UI tracking
  field: SegmentationField;
  operator: ConditionOperator;
  value: string | number | boolean | string[] | null;
}
 
type ConditionOperator =
  | 'eq'          // equals
  | 'neq'         // not equals
  | 'gt'          // greater than
  | 'gte'         // greater than or equal
  | 'lt'          // less than
  | 'lte'         // less than or equal
  | 'in'          // value is in array
  | 'not_in'      // value is not in array
  | 'contains'    // string contains (ILIKE %value%)
  | 'not_contains'
  | 'is_null'     // field is null
  | 'is_not_null'
  | 'array_contains'      // for array fields: array contains value
  | 'array_not_contains';

Available segmentation fields

type SegmentationField =
  // Direct student fields
  | 'batch_year'
  | 'major'
  | 'minor'
  | 'cgpa'
  | 'cgpa_scale'
  | 'category'
  | 'profile_verified'
  | 'is_active'
  | 'is_debarred'
  | 'roll_number'
  | 'c1' | 'c2' | 'c3' | 'c4' | 'c5'
 
  // Computed / joined
  | 'applications_count'          // total applications submitted
  | 'offers_count'                // total offers received
  | 'enrolled_cycle_ids'          // array of cycle UUIDs student is enrolled in
  | 'attended_event_ids'          // array of event UUIDs student attended
  | 'attended_company_ppt_ids'    // array of companyIds where student attended PPT
  | 'has_verified_resume'         // bool: at least one fully verified resume
  | 'verification_status'         // overall profile verification status

Field metadata (for UI dropdown):

interface FieldConfig {
  field: SegmentationField;
  label: string;              // "Batch year"
  type: 'number' | 'string' | 'boolean' | 'array' | 'enum';
  enumOptions?: string[];     // for enum fields like category, major
  operators: ConditionOperator[];
}
 
const FIELDS: FieldConfig[] = [
  { field: 'batch_year',       label: 'Batch year',         type: 'number',  operators: ['eq','neq','gt','gte','lt','lte','in'] },
  { field: 'major',            label: 'Major',              type: 'string',  operators: ['eq','neq','in','not_in','contains'] },
  { field: 'cgpa',             label: 'CGPA',               type: 'number',  operators: ['gt','gte','lt','lte','eq','is_null','is_not_null'] },
  { field: 'category',         label: 'Category',           type: 'enum',    enumOptions: ['general','obc','sc','st','ews'], operators: ['eq','neq','in','not_in'] },
  { field: 'c1',               label: 'CPP attended',       type: 'boolean', operators: ['eq'] },
  { field: 'c2',               label: 'IPP attended',       type: 'boolean', operators: ['eq'] },
  { field: 'profile_verified', label: 'Profile verified',   type: 'boolean', operators: ['eq'] },
  { field: 'offers_count',     label: 'Offers received',    type: 'number',  operators: ['eq','gt','gte','lt','lte'] },
  { field: 'attended_event_ids', label: 'Attended event',   type: 'array',   operators: ['array_contains','array_not_contains'] },
  // ... etc
];

SQL engine implementation

// modules/segmentation/engine.ts
 
export function buildSegmentationSQL(def: SegmentationDefinition, tenantId: string): {
  sql: string;
  params: unknown[];
} {
  const params: unknown[] = [tenantId];
  let paramIdx = 2;  // $1 = tenantId
 
  function addParam(val: unknown): string {
    params.push(val);
    return `$${paramIdx++}`;
  }
 
  function buildCondition(cond: SegmentationCondition): string {
    const col = fieldToColumn(cond.field);  // e.g. "batch_year" → "s.batch_year"
 
    switch (cond.operator) {
      case 'eq':          return `${col} = ${addParam(cond.value)}`;
      case 'neq':         return `${col} != ${addParam(cond.value)}`;
      case 'gt':          return `${col} > ${addParam(cond.value)}`;
      case 'gte':         return `${col} >= ${addParam(cond.value)}`;
      case 'lt':          return `${col} < ${addParam(cond.value)}`;
      case 'lte':         return `${col} <= ${addParam(cond.value)}`;
      case 'in':          return `${col} = ANY(${addParam(cond.value as string[])})`;
      case 'not_in':      return `NOT (${col} = ANY(${addParam(cond.value as string[])}))`;
      case 'contains':    return `${col} ILIKE ${addParam(`%${cond.value}%`)}`;
      case 'not_contains':return `${col} NOT ILIKE ${addParam(`%${cond.value}%`)}`;
      case 'is_null':     return `${col} IS NULL`;
      case 'is_not_null': return `${col} IS NOT NULL`;
      case 'array_contains':     return `${addParam(cond.value)} = ANY(${col})`;
      case 'array_not_contains': return `NOT (${addParam(cond.value)} = ANY(${col}))`;
    }
  }
 
  function buildGroup(group: SegmentationGroup): string {
    if (group.conditions.length === 0) return 'TRUE';
    const parts = group.conditions.map(buildCondition);
    return `(${parts.join(` ${group.operator} `)})`;
  }
 
  // Base query with computed columns
  const baseQuery = `
    WITH student_stats AS (
      SELECT
        s.*,
        COUNT(DISTINCT a.id) AS applications_count,
        COUNT(DISTINCT CASE WHEN a.status = 'offered' THEN a.id END) AS offers_count,
        ARRAY_AGG(DISTINCT ce.cycle_id) FILTER (WHERE ce.cycle_id IS NOT NULL) AS enrolled_cycle_ids,
        ARRAY_AGG(DISTINCT ea.event_id) FILTER (WHERE ea.event_id IS NOT NULL) AS attended_event_ids
      FROM students s
      LEFT JOIN applications a ON a.student_id = s.id AND a.tenant_id = s.tenant_id
      LEFT JOIN cycle_enrollments ce ON ce.student_id = s.id AND ce.tenant_id = s.tenant_id
      LEFT JOIN event_attendances ea ON ea.student_id = s.id AND ea.tenant_id = s.tenant_id
      WHERE s.tenant_id = $1
      GROUP BY s.id
    )
  `;
 
  // Build main WHERE from groups
  const groupClauses = def.groups.map(buildGroup);
  const groupsSQL = groupClauses.length > 0
    ? groupClauses.join(` ${def.groupOperator} `)
    : 'TRUE';
 
  // Include/exclude individuals
  const includeSQL = def.includeIndividuals.length > 0
    ? `OR id = ANY(${addParam(def.includeIndividuals)})`
    : '';
  const excludeSQL = def.excludeIndividuals.length > 0
    ? `AND id != ALL(${addParam(def.excludeIndividuals)})`
    : '';
 
  const sql = `
    ${baseQuery}
    SELECT * FROM student_stats
    WHERE (${groupsSQL} ${includeSQL})
    ${excludeSQL}
    ORDER BY name ASC
  `;
 
  return { sql, params };
}

Summary aggregation

The summary endpoint runs the same query but wraps it in aggregation functions:

export async function evaluateSegmentationWithSummary(def: SegmentationDefinition, tenantId: string) {
  const { sql, params } = buildSegmentationSQL(def, tenantId);
 
  // Get matching student IDs + summary stats in one query
  const summarySQL = `
    WITH matched AS (${sql})
    SELECT
      COUNT(*) AS total,
      AVG(cgpa) AS avg_cgpa,
      MAX(cgpa) AS max_cgpa,
      MIN(cgpa) AS min_cgpa,
      COUNT(CASE WHEN c1 THEN 1 END) AS c1_count,
      COUNT(CASE WHEN c2 THEN 1 END) AS c2_count,
      JSONB_OBJECT_AGG(major, major_count) AS major_breakdown
    FROM (
      SELECT *, COUNT(*) OVER (PARTITION BY major) AS major_count
      FROM matched
    ) t
  `;
 
  const result = await prisma.$queryRawUnsafe(summarySQL, ...params);
  return result[0];
}

Usage across the product

Where What it does
Cycle creation / edit Set eligibility: "which students can enroll in this cycle"
Job creation / edit Set additional eligibility on top of cycle eligibility
Bulk enrollment modal "Enroll all students matching this segmentation"
Student intel main view Filter the student table in real time
Student intel export Export only the filtered subset
Analytics Segment analytics by cohort (e.g. "CS students from batch 2025 only")
Admin student list Save named filters as quick chips

UI component behaviour

The SegmentationBuilder component (see tenant-web.md) sends the definition to the backend on every change (debounced 500ms) to show a live count:

User changes a condition
  → debounce 500ms
  → POST /api/tenants/:slug/segmentations/evaluate { definition: {...} }
  → Response: { count: 234, summary: { avgCgpa: 7.8, ... } }
  → Show "234 students match" below the builder

This is a read-only evaluate — it does NOT save the segmentation. The save button explicitly calls POST /api/tenants/:slug/segmentations to persist.


Saved segmentations

Saved segmentations are stored in the segmentations table and can be:

  • Referenced by a placement cycle (eligibilitySegmentationId)
  • Referenced by a job (eligibilitySegmentationId)
  • Used as quick-filter chips in the student table UI
  • Re-evaluated at any time (they snapshot the definition, not the result)

Named segmentations show in a "Saved filters" dropdown in the student intel view.


Example definitions

Batch 2025 CS or Econ students with CGPA above 7.5

{
  "groups": [
    {
      "operator": "AND",
      "conditions": [
        { "field": "batch_year", "operator": "eq", "value": 2025 },
        { "field": "major", "operator": "in", "value": ["CS", "Economics"] },
        { "field": "cgpa", "operator": "gte", "value": 7.5 }
      ]
    }
  ],
  "groupOperator": "AND",
  "includeIndividuals": [],
  "excludeIndividuals": []
}

Students eligible for category 1 jobs (attended CPP, no current offer)

{
  "groups": [
    {
      "operator": "AND",
      "conditions": [
        { "field": "c1", "operator": "eq", "value": true },
        { "field": "offers_count", "operator": "eq", "value": 0 },
        { "field": "is_debarred", "operator": "eq", "value": false }
      ]
    }
  ],
  "groupOperator": "AND",
  "includeIndividuals": [],
  "excludeIndividuals": ["student-uuid-manually-excluded"]
}

Students who attended McKinsey PPT OR have finance major

{
  "groups": [
    {
      "operator": "AND",
      "conditions": [
        { "field": "attended_event_ids", "operator": "array_contains", "value": "event-uuid-mckinsey-ppt" }
      ]
    },
    {
      "operator": "AND",
      "conditions": [
        { "field": "major", "operator": "eq", "value": "Finance" }
      ]
    }
  ],
  "groupOperator": "OR",
  "includeIndividuals": [],
  "excludeIndividuals": []
}