for (port = 0; port < ds->num_ports; port++) { conststruct tc_taprio_qopt_offload *offload;
offload = tas_data->offload[port]; if (!offload) continue;
tas_data->enabled = true;
if (max_cycle_time < offload->cycle_time)
max_cycle_time = offload->cycle_time; if (latest_base_time < offload->base_time)
latest_base_time = offload->base_time; if (earliest_base_time > offload->base_time) {
earliest_base_time = offload->base_time;
its_cycle_time = offload->cycle_time;
}
}
if (!list_empty(&gating_cfg->entries)) {
tas_data->enabled = true;
if (max_cycle_time < gating_cfg->cycle_time)
max_cycle_time = gating_cfg->cycle_time; if (latest_base_time < gating_cfg->base_time)
latest_base_time = gating_cfg->base_time; if (earliest_base_time > gating_cfg->base_time) {
earliest_base_time = gating_cfg->base_time;
its_cycle_time = gating_cfg->cycle_time;
}
}
if (!tas_data->enabled) return 0;
/* Roll the earliest base time over until it is in a comparable * time base with the latest, then compare their deltas. * We want to enforce that all ports' base times are within * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
*/
earliest_base_time = future_base_time(earliest_base_time,
its_cycle_time,
latest_base_time); while (earliest_base_time > latest_base_time)
earliest_base_time -= its_cycle_time; if (latest_base_time - earliest_base_time >
sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
dev_err(ds->dev, "Base times too far apart: min %llu max %llu\n",
earliest_base_time, latest_base_time); return -ERANGE;
}
dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
return 0;
}
/* Lo and behold: the egress scheduler from hell. * * At the hardware level, the Time-Aware Shaper holds a global linear arrray of * all schedule entries for all ports. These are the Gate Control List (GCL) * entries, let's call them "timeslots" for short. This linear array of * timeslots is held in BLK_IDX_SCHEDULE. * * Then there are a maximum of 8 "execution threads" inside the switch, which * iterate cyclically through the "schedule". Each "cycle" has an entry point * and an exit point, both being timeslot indices in the schedule table. The * hardware calls each cycle a "subschedule". * * Subschedule (cycle) i starts when * ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta. * * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from * k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to * k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i] * * For each schedule entry (timeslot) k, the engine executes the gate control * list entry for the duration of BLK_IDX_SCHEDULE[k].delta. * * +---------+ * | | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS * +---------+ * | * +-----------------+ * | .actsubsch * BLK_IDX_SCHEDULE_ENTRY_POINTS v * +-------+-------+ * |cycle 0|cycle 1| * +-------+-------+ * | | | | * +----------------+ | | +-------------------------------------+ * | .subschindx | | .subschindx | * | | +---------------+ | * | .address | .address | | * | | | | * | | | | * | BLK_IDX_SCHEDULE v v | * | +-------+-------+-------+-------+-------+------+ | * | |entry 0|entry 1|entry 2|entry 3|entry 4|entry5| | * | +-------+-------+-------+-------+-------+------+ | * | ^ ^ ^ ^ | * | | | | | | * | +-------------------------+ | | | | * | | +-------------------------------+ | | | * | | | +-------------------+ | | * | | | | | | * | +---------------------------------------------------------------+ | * | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| | * | +---------------------------------------------------------------+ | * | ^ ^ BLK_IDX_SCHEDULE_PARAMS | * | | | | * +--------+ +-------------------------------------------+ * * In the above picture there are two subschedules (cycles): * * - cycle 0: iterates the schedule table from 0 to 2 (and back) * - cycle 1: iterates the schedule table from 3 to 5 (and back) * * All other possible execution threads must be marked as unused by making * their "subschedule end index" (subscheind) equal to the last valid * subschedule's end index (in this case 5).
*/ int sja1105_init_scheduling(struct sja1105_private *priv)
{ struct sja1105_schedule_entry_points_entry *schedule_entry_points; struct sja1105_schedule_entry_points_params_entry
*schedule_entry_points_params; struct sja1105_schedule_params_entry *schedule_params; struct sja1105_tas_data *tas_data = &priv->tas_data; struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg; struct sja1105_schedule_entry *schedule; struct dsa_switch *ds = priv->ds; struct sja1105_table *table; int schedule_start_idx;
s64 entry_point_delta; int schedule_end_idx; int num_entries = 0; int num_cycles = 0; int cycle = 0; int i, k = 0; int port, rc;
rc = sja1105_tas_set_runtime_params(priv); if (rc < 0) return rc;
/* Figure out the dimensioning of the problem */ for (port = 0; port < ds->num_ports; port++) { if (tas_data->offload[port]) {
num_entries += tas_data->offload[port]->num_entries;
num_cycles++;
}
}
if (!list_empty(&gating_cfg->entries)) {
num_entries += gating_cfg->num_entries;
num_cycles++;
}
/* Nothing to do */ if (!num_cycles) return 0;
/* Pre-allocate space in the static config tables */
/* Schedule Points Parameters Table */
table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
table->ops->unpacked_entry_size, GFP_KERNEL); if (!table->entries) /* Previously allocated memory will be freed automatically in * sja1105_static_config_free. This is true for all early * returns below.
*/ return -ENOMEM;
table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
schedule_entry_points_params = table->entries;
for (port = 0; port < ds->num_ports; port++) { conststruct tc_taprio_qopt_offload *offload; /* Relative base time */
s64 rbt;
offload = tas_data->offload[port]; if (!offload) continue;
schedule_start_idx = k;
schedule_end_idx = k + offload->num_entries - 1; /* This is the base time expressed as a number of TAS ticks * relative to PTPSCHTM, which we'll (perhaps improperly) call * the operational base time.
*/
rbt = future_base_time(offload->base_time,
offload->cycle_time,
tas_data->earliest_base_time);
rbt -= tas_data->earliest_base_time; /* UM10944.pdf 4.2.2. Schedule Entry Points table says that * delta cannot be zero, which is shitty. Advance all relative * base times by 1 TAS delta, so that even the earliest base * time becomes 1 in relative terms. Then start the operational * base time (PTPSCHTM) one TAS delta earlier than planned.
*/
entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
/* The subschedule end indices need to be * monotonically increasing.
*/ for (i = cycle; i < 8; i++)
schedule_params->subscheind[i] = schedule_end_idx;
for (i = 0; i < offload->num_entries; i++, k++) {
s64 delta_ns = offload->entries[i].interval;
/* Be there 2 port subschedules, each executing an arbitrary number of gate * open/close events cyclically. * None of those gate events must ever occur at the exact same time, otherwise * the switch is known to act in exotically strange ways. * However the hardware doesn't bother performing these integrity checks. * So here we are with the task of validating whether the new @admin offload * has any conflict with the already established TAS configuration in * tas_data->offload. We already know the other ports are in harmony with one * another, otherwise we wouldn't have saved them. * Each gate event executes periodically, with a period of @cycle_time and a * phase given by its cycle's @base_time plus its offset within the cycle * (which in turn is given by the length of the events prior to it). * There are two aspects to possible collisions: * - Collisions within one cycle's (actually the longest cycle's) time frame. * For that, we need to compare the cartesian product of each possible * occurrence of each event within one cycle time. * - Collisions in the future. Events may not collide within one cycle time, * but if two port schedules don't have the same periodicity (aka the cycle * times aren't multiples of one another), they surely will some time in the * future (actually they will collide an infinite amount of times).
*/ staticbool
sja1105_tas_check_conflicts(struct sja1105_private *priv, int port, conststruct tc_taprio_qopt_offload *admin)
{ struct sja1105_tas_data *tas_data = &priv->tas_data; conststruct tc_taprio_qopt_offload *offload;
s64 max_cycle_time, min_cycle_time;
s64 delta1, delta2;
s64 rbt1, rbt2;
s64 stop_time;
s64 t1, t2; int i, j;
s32 rem;
offload = tas_data->offload[port]; if (!offload) returnfalse;
/* Check if the two cycle times are multiples of one another. * If they aren't, then they will surely collide.
*/
max_cycle_time = max(offload->cycle_time, admin->cycle_time);
min_cycle_time = min(offload->cycle_time, admin->cycle_time);
div_s64_rem(max_cycle_time, min_cycle_time, &rem); if (rem) returntrue;
/* Calculate the "reduced" base time of each of the two cycles * (transposed back as close to 0 as possible) by dividing to * the cycle time.
*/
div_s64_rem(offload->base_time, offload->cycle_time, &rem);
rbt1 = rem;
/* delta1 is the relative base time of each GCL entry within * the established ports' TAS config.
*/ for (i = 0, delta1 = 0;
i < offload->num_entries;
delta1 += offload->entries[i].interval, i++) { /* delta2 is the relative base time of each GCL entry * within the newly added TAS config.
*/ for (j = 0, delta2 = 0;
j < admin->num_entries;
delta2 += admin->entries[j].interval, j++) { /* t1 follows all possible occurrences of the * established ports' GCL entry i within the * first cycle time.
*/ for (t1 = rbt1 + delta1;
t1 <= stop_time;
t1 += offload->cycle_time) { /* t2 follows all possible occurrences * of the newly added GCL entry j * within the first cycle time.
*/ for (t2 = rbt2 + delta2;
t2 <= stop_time;
t2 += admin->cycle_time) { if (t1 == t2) {
dev_warn(priv->ds->dev, "GCL entry %d collides with entry %d of port %d\n",
j, i, port); returntrue;
}
}
}
}
}
returnfalse;
}
/* Check the tc-taprio configuration on @port for conflicts with the tc-gate * global subschedule. If @port is -1, check it against all ports. * To reuse the sja1105_tas_check_conflicts logic without refactoring it, * convert the gating configuration to a dummy tc-taprio offload structure.
*/ bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port, struct netlink_ext_ack *extack)
{ struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
size_t num_entries = gating_cfg->num_entries; struct tc_taprio_qopt_offload *dummy; struct dsa_switch *ds = priv->ds; struct sja1105_gate_entry *e; bool conflict; int i = 0;
if (list_empty(&gating_cfg->entries)) returnfalse;
dummy = kzalloc(struct_size(dummy, entries, num_entries), GFP_KERNEL); if (!dummy) {
NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory"); returntrue;
}
/* The cycle time extension is the amount of time the last cycle from * the old OPER needs to be extended in order to phase-align with the * base time of the ADMIN when that becomes the new OPER. * But of course our switch needs to be reset to switch-over between * the ADMIN and the OPER configs - so much for a seamless transition. * So don't add insult over injury and just say we don't support cycle * time extension.
*/ if (admin->cycle_time_extension) return -ENOTSUPP;
for (i = 0; i < admin->num_entries; i++) {
s64 delta_ns = admin->entries[i].interval;
s64 delta_cycles = ns_to_sja1105_delta(delta_ns); bool too_long, too_short;
/* The schedule engine and the PTP clock are driven by the same oscillator, and * they run in parallel. But whilst the PTP clock can keep an absolute * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make * up a delta, which is 200ns), and wrapping around at the end of each cycle. * The schedule engine is started when the PTP clock reaches the PTPSCHTM time * (in PTP domain). * Because the PTP clock can be rate-corrected (accelerated or slowed down) by * a software servo, and the schedule engine clock runs in parallel to the PTP * clock, there is logic internal to the switch that periodically keeps the * schedule engine from drifting away. The frequency with which this internal * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is * a value also in the PTP clock domain, and is also rate-corrected. * To be precise, during a correction period, there is logic to determine by * how many scheduler clock ticks has the PTP clock drifted. At the end of each * correction period/beginning of new one, the length of a delta is shrunk or * expanded with an integer number of ticks, compared with the typical 25. * So a delta lasts for 200ns (or 25 ticks) only on average. * Sometimes it is longer, sometimes it is shorter. The internal syntonization * logic can adjust for at most 5 ticks each 20 ticks. * * The first implication is that you should choose your schedule correction * period to be an integer multiple of the schedule length. Preferably one. * In case there are schedules of multiple ports active, then the correction * period needs to be a multiple of them all. Given the restriction that the * cycle times have to be multiples of one another anyway, this means the * correction period can simply be the largest cycle time, hence the current * choice. This way, the updates are always synchronous to the transmission * cycle, and therefore predictable. * * The second implication is that at the beginning of a correction period, the * first few deltas will be modulated in time, until the schedule engine is * properly phase-aligned with the PTP clock. For this reason, you should place * your best-effort traffic at the beginning of a cycle, and your * time-triggered traffic afterwards. * * The third implication is that once the schedule engine is started, it can * only adjust for so much drift within a correction period. In the servo you * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you * want to do the latter, you need to stop and restart the schedule engine, * which is what the state machine handles.
*/ staticvoid sja1105_tas_state_machine(struct work_struct *work)
{ struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work); struct sja1105_private *priv = tas_to_sja1105(tas_data); struct sja1105_ptp_data *ptp_data = &priv->ptp_data; struct timespec64 base_time_ts, now_ts; struct dsa_switch *ds = priv->ds; struct timespec64 diff;
s64 base_time, now; int rc = 0;
mutex_lock(&ptp_data->lock);
switch (tas_data->state) { case SJA1105_TAS_STATE_DISABLED: /* Can't do anything at all if clock is still being stepped */ if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) break;
rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time); if (rc < 0) break;
rc = __sja1105_ptp_gettimex(ds, &now, NULL); if (rc < 0) break;
/* Plan to start the earliest schedule first. The others * will be started in hardware, by way of their respective * entry points delta. * Try our best to avoid fringe cases (race condition between * ptpschtm and ptpstrtsch) by pushing the oper_base_time at * least one second in the future from now. This is not ideal, * but this only needs to buy us time until the * sja1105_tas_start command below gets executed.
*/
base_time = future_base_time(tas_data->earliest_base_time,
tas_data->max_cycle_time,
now + 1ull * NSEC_PER_SEC);
base_time -= sja1105_delta_to_ns(1);
rc = sja1105_tas_set_base_time(priv, base_time); if (rc < 0) break;
dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
base_time_ts.tv_sec, base_time_ts.tv_nsec,
now_ts.tv_sec, now_ts.tv_nsec);
break;
case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING: if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) { /* Clock was stepped.. bad news for TAS */
sja1105_tas_stop(priv); break;
}
/* Check if TAS has actually started, by comparing the * scheduled start time with the SJA1105 PTP clock
*/
rc = __sja1105_ptp_gettimex(ds, &now, NULL); if (rc < 0) break;
if (now < tas_data->oper_base_time) { /* TAS has not started yet */
diff = ns_to_timespec64(tas_data->oper_base_time - now);
dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
diff.tv_sec, diff.tv_nsec); break;
}
/* Time elapsed, what happened? */
rc = sja1105_tas_check_running(priv); if (rc < 0) break;
if (tas_data->state != SJA1105_TAS_STATE_RUNNING) /* TAS has started */
dev_err(ds->dev, "TAS not started despite time elapsed\n");
break;
case SJA1105_TAS_STATE_RUNNING: /* Clock was stepped.. bad news for TAS */ if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
sja1105_tas_stop(priv); break;
}
rc = sja1105_tas_check_running(priv); if (rc < 0) break;
if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
dev_err(ds->dev, "TAS surprisingly stopped\n");
break;
default: if (net_ratelimit())
dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
}
if (rc && net_ratelimit())
dev_err(ds->dev, "An operation returned %d\n", rc);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.