group-call.proto 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394
  1. // # Group Call Protocol
  2. //
  3. // Note that group calls are not necessarily bound to a Threema group. _Group_
  4. // refers to a group of call participants and is a way to distinguish from 1:1
  5. // Threema calls.
  6. //
  7. // There are two primary variants which use the same technology underneath:
  8. //
  9. // - A group call scoped to a (Threema) group is simple and easy to use. It does
  10. // not have any advanced functionality such as administration or external
  11. // guests. Only one group call is intended to run within a group.
  12. // - A conference call is a more advanced type of group call and delivers more
  13. // advanced functionality such as administration. Concrete specification
  14. // pending.
  15. //
  16. // The theoretical maximum amount of participants is 790 (due to the way we
  17. // derive WebRTC MIDs) but the practical limit is way below that.
  18. //
  19. // ## Terminology
  20. //
  21. // - `GCK`: Group Call Key, only used for key derivation
  22. // - `GCKH`: Group Call Key Hash
  23. // - `GCNHAK`: Group Call Normal Handshake Authentication Key
  24. // - `GCHK`: Group Call Handshake Key
  25. // - `GCSK`: Group Call State Key
  26. // - `GCAK`: Group Call Administrator Key, only used for key derivation
  27. // - `GCAMK`: Group Call Administrator Message Key
  28. // - `PCK`: Participant Call Key
  29. // - `PCMK`: Participant Call Media Key, only used for key derivation
  30. // - `PCMK`': Ratchet iteration of PCMK
  31. // - `PCMFK`: Participant Call Media Frame Key
  32. // - `PCCK`: Participant Call Cookie
  33. // - `PCSN`: Participant Call Sequence Number
  34. // - `MFSN`: Media Frame Sequence Number
  35. //
  36. // ## General Information
  37. //
  38. // **Endianness**: All integers use little-endian encoding.
  39. //
  40. // **Encryption cipher**: XSalsa20-Poly1305, unless otherwise specified.
  41. //
  42. // **Nonce format**:
  43. //
  44. // - a 16 byte cookie (PCCK), followed by
  45. // - a monotonically increasing sequence number (PCSN, u64-le).
  46. //
  47. // **Sequence number**: The sequence number starts with `1` and is counted
  48. // separately for each direction (i.e. there is one sequence number counter for
  49. // the sender and one for the receiver). We will use `PCSN+` in this document to
  50. // denote that the counter should be increased **after** the value has been
  51. // inserted (i.e. semantically equivalent to `x++` in many languages).
  52. //
  53. // Note: This format is equivalent to the CSP transport encryption.
  54. //
  55. // ## Key Derivation
  56. //
  57. // Note: All keys that are not derived from `GCK` directly will be derived using
  58. // `GCKH` as input. This ensures that exchanged secret keys are useless if the
  59. // Group Call ID has been exposed (unless `GCK` is also known to the attacker).
  60. //
  61. // GCKH = BLAKE2b(key=GCK, salt='#', personal='3ma-call')
  62. //
  63. // GCHK = BLAKE2b(key=GCK, salt='h', personal='3ma-call')
  64. // GCSK = BLAKE2b(key=GCK, salt='s', personal='3ma-call')
  65. //
  66. // GCAMK = BLAKE2b(key=GCAK, salt='am', personal='3ma-call', input=GCKH)
  67. //
  68. // PCMK' = BLAKE2b(key=PCMK, salt="m'", personal='3ma-call')
  69. // PCMFK = BLAKE2b(key=PCMK, salt='mf', personal='3ma-call', input=GCKH)
  70. //
  71. // ## Group Call ID Derivation
  72. //
  73. // For group calls scoped to groups, the Group Call ID is derived by running
  74. // BLAKE2b on specific data provided by the `GroupCallStart`:
  75. //
  76. // group-call-id = BLAKE2b(
  77. // out-length=32,
  78. // salt='i',
  79. // personal='3ma-call',
  80. // input=
  81. // group-creator-identity
  82. // || group-id
  83. // || u8(GroupCallStart.protocol_version)
  84. // || GroupCallStart.gck
  85. // || utf8-encode(GroupCallStart.sfu_base_url),
  86. // )
  87. //
  88. // ## Protocol Flow
  89. //
  90. // ### Obtain SFU Information
  91. //
  92. // Before a call can be joined or created, SFU information and an authentication
  93. // token need to be obtained via the Directory Server API. The obtained
  94. // information includes the following items referenced in subsequent sections:
  95. //
  96. // - _SFU Base URL_: Base URL used to create and distribute new calls.
  97. // - _Allowed SFU Hostname Suffixes_: A set of allowed hostname suffixes to be
  98. // applied against the _SFU Base URL_ when joining calls.
  99. // - _SFU Token_: An opaque token used to authenticate against the SFU.
  100. //
  101. // When receiving the SFU information, ensure the _SFU Base URL_ uses the scheme
  102. // `https` and the included hostname ends with one of the _Allowed SFU Hostname
  103. // Suffixes_.
  104. //
  105. // ### Scoped to Group
  106. //
  107. // #### Periodic Refresh
  108. //
  109. // The following steps are defined as the _Group Call Refresh Steps_ and will be
  110. // applied to update the group calls that are currently considered running
  111. // within a group, determining which one of them is the chosen call and
  112. // potentially join the chosen call:
  113. //
  114. // 1. Let `running` be the list of group calls that are currently considered
  115. // running within the group.
  116. // 2. Let `calls` be a copy of `running`. Reset the _token-refreshed_ mark of
  117. // each `call` of `calls` (or simply scope it to the execution of these
  118. // steps).
  119. // 3. For each `call` of `calls`, run the following steps (labelled _peek-call_)
  120. // concurrently and wait for them to return:
  121. // 1. If the user is currently participating in `call`, abort the _peek-call_
  122. // sub-steps.
  123. // 2. _Peek_ the `call` via a `SfuHttpRequest.Peek` request. If this does not
  124. // result in a response within 5s, remove `call` from `calls` and abort
  125. // the _peek-call_ sub-steps.
  126. // 3. If the received status code for `call` is `401` and `call` is not
  127. // marked with _token-refreshed_:
  128. // 1. Refresh the _SFU Token_. If the _SFU Token_ refresh fails or does
  129. // not yield an _SFU Token_ within 10s, remove `call` from `calls` and
  130. // abort the _peek-call_ sub-steps.
  131. // 2. Mark the `call` as _token-refreshed_.
  132. // 3. Restart the _peek-call_ sub-steps for this `call`.
  133. // 4. If the server could not be reached or the received status code is not
  134. // `200` or if the _Peek_ response could not be decoded:
  135. // 1. Remove `call` from `calls`.
  136. // 2. If the received status code is `404`, remove `call` from `running`
  137. // and abort the _peek-call_ sub-steps.
  138. // 3. If the `call`'s _failed_ counter is `>= 3` and the `call` was
  139. // received more than 10h ago, remove `call` from `running` and abort
  140. // the _peek-call_ sub-steps.
  141. // 4. Increase the _failed_ counter for `call` by `1` and abort the
  142. // _peek-call_ sub-steps.
  143. // 5. Reset the `call`'s _failed_ counter to `0`.
  144. // 6. If the protocol version of the `call` is not supported, remove `call`
  145. // from `calls`, log a warning that a group call with an unsupported
  146. // version is currently running and abort the _peek-call_ sub-steps.
  147. // 7. (`call` is kept in `calls` and in `running`.)
  148. // 4. If `running` is empty, cancel the timer to periodically re-run the _Group
  149. // Call Refresh Steps_ of this group. Otherwise, restart or schedule the
  150. // timer to re-run the _Group Call Refresh Steps_ of this group in 10s.
  151. // 5. Let `chosen-call` be any call of `calls` with the highest `started_at`
  152. // value (i.e. the most recently created call) as provided by the _peek_
  153. // result.
  154. // 6. If `chosen-call` is not defined, signal that no group call is currently
  155. // running within the group, abort these steps and return `chosen-call`.
  156. // 7. Signal `chosen-call` as the currently running group call within the group.
  157. // 8. If the _Group Call Join Steps_ are currently running with a different (or
  158. // new) group call than `chosen-call`, cancel and restart the _Group Call
  159. // Join Steps_ asynchronously with the same `intent` but with the
  160. // `chosen-call`.
  161. // 9. If the user is currently participating in a group call of this group that
  162. // is different to `chosen-call`, exit the running group call and run the
  163. // _Group Call Join Steps_ asynchronously with the `intent` to _only join_
  164. // `chosen-call`.
  165. // 10. Return `chosen-call`.
  166. //
  167. // Note: The above steps have been carefully crafted to gracefully handle cases
  168. // where the SFU of one call cannot be reached for a short period of time.
  169. //
  170. // When the Threema app is active, run the _Group Call Refresh Steps_ for each
  171. // group. This will start a timer to refresh any group call status.
  172. //
  173. // When the user leaves a group call, run the _Group Call Refresh Steps_ for the
  174. // respective group.
  175. //
  176. // The above described timer may be cancelled when the Threema app is inactive.
  177. // The timer interval may be increased to 30s in case the group conversation is
  178. // currently not visible to the user.
  179. //
  180. // #### Create or Join
  181. //
  182. // The following steps are to be run when a user wants to join a group call of a
  183. // group where a group call is currently considered running (e.g. the user hits
  184. // _join_ in the UI) or when the user intents to create a group call for a group
  185. // where no group call is currently considered running (e.g. the user hits the
  186. // _call_ button in the UI):
  187. //
  188. // 1. Let `intent` be the user's intent, i.e. to either _only join_ or _create
  189. // or join_ a group call.
  190. // 2. Refresh the _SFU Token_ if necessary. If the _SFU Token_ refresh fails
  191. // within 10s, abort these steps and notify the user.
  192. // 3. Run the _Group Call Refresh Steps_ for the respective group and let `call`
  193. // be the result.
  194. // 4. If `call` is undefined and `intent` is to _only join_, abort these steps
  195. // and notify the user that no group call is running / the group call is no
  196. // longer running.
  197. // 5. If `call` is undefined, create (but don't send) a `GroupCallStart`
  198. // message, apply it to `call` and mark `call` as _new_.
  199. // 6. Run the _Group Call Join Steps_ with the `intent` and `call`.
  200. //
  201. // The following steps are defined as the _Group Call Join Steps_ (also applied
  202. // for creating a group call).:
  203. //
  204. // 1. Let `intent` be either _only join_ or _create or join_. Let `call` be the
  205. // given group call to be joined (or created).
  206. // 2. _Join_ (or implicitly create) the group call via a `SfuHttpRequest.Join`
  207. // request. If this does not result in a response within 10s, abort these
  208. // steps and notify the user.
  209. // 3. If the received status code is `503`, notify the user that the group call
  210. // is full and abort these steps.
  211. // 4. If the server could not be reached or the received status code is not
  212. // `200` or if the _Join_ response could not be decoded, abort these steps
  213. // and notify the user.
  214. // 5. Establish a WebRTC connection to the SFU with the information provided in
  215. // the _Join_ response. Wait until the SFU sent the initial
  216. // `SfuToParticipant.Hello` message via the associated data channel. Let
  217. // `hello` be that message.
  218. // 6. If the `hello.participants` contains less than 4 items, set the initial
  219. // capture state of the microphone to _on_.
  220. // 7. If `call` is marked as _new_:
  221. // 1. Optionally add an artificial wait period of 2s minus the time elapsed
  222. // since step 1.[^1]
  223. // 2. Let `message-id` be a random message ID.
  224. // 3. Schedule a persistent task to run he _Bundled Messages Send Steps_ with
  225. // the following properties:
  226. // - `id` set to `message-id`,
  227. // - `receivers` set to all group members that have `GROUP_CALL_SUPPORT`,
  228. // - to construct a `GroupCallStart` message from `call`.
  229. // 4. Add the created `call` to the list of group calls that are currently
  230. // considered running.
  231. // 5. Asynchronously run the _Group Call Refresh Steps_.[^2]
  232. // 8. The group call is now considered established and should asynchronously
  233. // invoke the SFU to Participant and Participant to Participant flows.
  234. //
  235. // [^1]: This prevents butter-fingered user from accidentally starting a group
  236. // call.
  237. //
  238. // [^2]: This will initiate the refresh timer for a newly created call and
  239. // signal it to the UI.
  240. //
  241. // Note: Implementations need to ensure that only one group call can be active
  242. // at the same time in the application. This means that only one invocation of
  243. // the _Create or Join_ flow and only one invocation of the _Group Call Join
  244. // Steps_ can be active. Be aware that these steps can be cancelled by the user
  245. // and by the _Group Call Refresh Steps_.
  246. //
  247. // ### SFU to Participant Flow
  248. //
  249. // Upon successful joining via `SfuHttpRequest.Join`, the SFU waits for the
  250. // client to establish a WebRTC connection and then announces all participants
  251. // to the newly joined participant in its `SfuToParticipant.Hello` message.
  252. //
  253. // When another participant joins or leaves, a `ParticipantJoined` or
  254. // `ParticipantLeft` message will be sent.
  255. //
  256. // At any time, participants may subscribe and unsubscribe receiving microphone,
  257. // camera and screen data from other participants.
  258. //
  259. // If the user is alone in a call for more than 3 minute, the call should be
  260. // left to save resources. The SFU will automatically drop such calls after 5
  261. // minutes but this results in non-ideal UX.
  262. //
  263. // ### Participant to Participant Flow
  264. //
  265. // Unlike the other flows, this one is more complicated and needs to be done
  266. // separately for each other participant. During the handshake, ephemeral
  267. // encryption keys will be established.
  268. //
  269. // Note that multiple participants with the same Threema ID in the same call are
  270. // **explicitly allowed**. Not only can this happen in case the connection has
  271. // been lost (e.g. the client already reconnected but the SFU has not detected
  272. // connection loss yet), but it is also a feature for multi-device capable
  273. // clients.
  274. //
  275. // #### Handshake
  276. //
  277. // When a new participant (NP) joins, it must authenticate each other existing
  278. // participant (EP) and establish an ephemeral shared secret (`PCK`). The flow
  279. // depends on whether NP and EP are normal or guest participants:
  280. //
  281. // If both are normal participants:
  282. //
  283. // NP ----- Hello ---> EP
  284. // NP <---- Hello ---- EP
  285. // NP <---- Auth ----- EP
  286. // NP ----- Auth ----> EP
  287. //
  288. // If both are guest participants:
  289. //
  290. // NP -- GuestHello -> EP
  291. // NP <- GuestHello -- EP
  292. // NP <- GuestAuth --- EP
  293. // NP -- GuestAuth --> EP
  294. //
  295. // If NP is a normal participant and EP is a guest participant:
  296. //
  297. // NP ----- Hello ---> EP
  298. // NP <- GuestHello -- EP
  299. // NP <- GuestAuth --- EP
  300. // NP -- GuestAuth --> EP
  301. //
  302. // If NP is a guest participant and EP is a normal participant:
  303. //
  304. // NP -- GuestHello -> EP
  305. // NP <---- Hello ---- EP
  306. // NP <- GuestAuth --- EP
  307. // NP -- GuestAuth --> EP
  308. //
  309. // Note: This looks more intimidating than it really is. Basically, if either is
  310. // a guest, we fulfill the guest handshake but both always start with sending
  311. // their respective role's _hello_ variant.
  312. //
  313. // For group calls scoped to groups:
  314. //
  315. // - Only handshake messages from Threema IDs that are part of the group are
  316. // allowed.
  317. // - External guests are not allowed and therefore the guest handshake is not
  318. // allowed.
  319. //
  320. // #### Post-Handshake
  321. //
  322. // After the handshake, **both** sides run the following steps:
  323. //
  324. // 1. Subscribe to the other participant's microphone feed (i.e. send a
  325. // `ParticipantMicrophone` message to the SFU).
  326. // 2. If the user is an administrator, send an `Admin.ReportAsAdmin` message to
  327. // the other participant.
  328. // 3. If _hold_ is currently active, send a `Hold` message to the other
  329. // participant.
  330. // 4. If _hold_ is not currently active, send a `CaptureState` message to the
  331. // other participant for each device (camera, microphone, ...) that is
  332. // currently activated (`Mode` is `ON`).
  333. //
  334. // #### Join/Leave of Other Participants
  335. //
  336. // When a new participant joins, all other participants run the following steps:
  337. //
  338. // 1. Let `pcmk` be the currently _applied_ PCMK with the associated context.
  339. // 2. If the amount of ratchet rounds for `pcmk` is `255`, abort the call with
  340. // an error condition and abort these steps.
  341. // 3. Advance the ratchet of `pcmk` once (i.e. replace the key by deriving
  342. // PCMK') and apply for media encryption immediately. Note: Do **not** reset
  343. // the MFSN!
  344. // 4. Set the _handshake state_ of this participant to `await-np-hello`.
  345. //
  346. // Note: The announcement of the new participant is guaranteed to be sent prior
  347. // to any handshake messages of the new participant.
  348. //
  349. // When a participant leaves, all other participants run the following steps:
  350. //
  351. // 1. Let `pending-pcmk` be the currently _pending_ PCMK the associated context.
  352. // 2. If `pending-pcmk` exists, additionally mark `pending-pcmk` as _stale_ and
  353. // abort these steps.
  354. // 3. Let `current-pcmk` be the currently _applied_ PCMK with the associated
  355. // context.
  356. // 4. Set `pending-pcmk` in the following way:
  357. // 1. Generate a new cryptographically secure random PCMK and assign it to
  358. // `pending-pcmk`.
  359. // 2. Set `pending-pcmk.epoch` to `current-pcmk.epoch + 1`, wrap back to `0`
  360. // if it would be `256`.
  361. // 3. Set `pending-pcmk.ratchet_counter` to `0`.
  362. // 4. Do **not** reset the MFSN! Continue the existing MFSN counter of the
  363. // previous PCMK.
  364. // 5. Send `pending-pcmk` to all authenticated participants via a _rekey_
  365. // message.
  366. // 6. Schedule a volatile task bound to the call to run the following steps
  367. // after 2s:
  368. // 1. Apply `pending-pcmk` for media encryption. This means that
  369. // `pending-pcmk` now replaces the _applied_ PCMK and is no longer
  370. // _pending_.
  371. // 2. If `pending-pcmk` is marked as _stale_, run the parent steps from the
  372. // beginning.
  373. //
  374. // When a participant receives a _rekey_ message from another participant.
  375. //
  376. // 1. Let `current-pcmk` be the PCMK and its associated context used for the
  377. // participant.
  378. // 2. Let `new-pcmk` be the media keys (PCMK) of the received message.
  379. // 3. Store `new-pcmk` as a successor to `current-pcmk` (and any other successor
  380. // already stored on `current-pcmk`) and follow the description of the media
  381. // frame on when to apply it.
  382. //
  383. // Note: The result of the above steps is that re-keying is throttled but always
  384. // catches up to the current participant state with a maximum delay of 4s.
  385. //
  386. // #### State Update
  387. //
  388. // One of the participants is deterministically designated to update the
  389. // peekable call state every 10s and additionally every time a participant joins
  390. // or leaves. If the call state has not been updated/refreshed for 30s, the SFU
  391. // will delete it.
  392. //
  393. // After each change to the list of participants, run the following steps to
  394. // determine whether the user is designated:
  395. //
  396. // 1. Cancel any running timer to update the call state.
  397. // 2. Let `candidates` be a list of all currently authenticated non-guest
  398. // participants.
  399. // 3. If `candidates` is empty, add all currently authenticated guest
  400. // participants to the list.
  401. // 4. If the user is not in `candidates`, abort these steps.
  402. // 5. If the user does not have the lowest participant ID in `candidates`, abort
  403. // these steps.
  404. // 6. Send a `ParticipantToSfu.UpdateCallState` message to the SFU and schedule
  405. // a repetitive timer to repeat this step every 10s.
  406. //
  407. // Note: The above algorithm is prone to races since the authentication process
  408. // is asynchronous for each participant pair. However, this should not be an
  409. // issue as they'd essentially post the same status (eventually).
  410. syntax = "proto3";
  411. package groupcall;
  412. option java_package = "ch.threema.protobuf.groupcall";
  413. option java_multiple_files = true;
  414. import "common.proto";
  415. // Current call state as announced by the designated client.
  416. //
  417. // Note: The `CallState` accurateness must not be relied upon as it can be out
  418. // of date and can be replayed by the SFU.
  419. message CallState {
  420. // Random amount of padding, ignored by the receiver.
  421. bytes padding = 1;
  422. // Participant ID of the designated client that created this message.
  423. uint32 state_created_by = 2;
  424. // UNIX-ish timestamp in milliseconds the designated client created this
  425. // message.
  426. uint64 state_created_at = 3;
  427. // Information for a single participant.
  428. message Participant {
  429. reserved 1; // Redundant participant ID
  430. // A _normal_ participant, i.e. a Threema client.
  431. message Normal {
  432. // Threema ID of the sender.
  433. string identity = 1;
  434. // Nickname associated to the Threema ID (without `~` prefix).
  435. string nickname = 2;
  436. }
  437. // A _guest_ participant.
  438. message Guest {
  439. // The guest's self-assigned name.
  440. string name = 1;
  441. }
  442. // Type-specific information.
  443. oneof participant {
  444. Normal threema = 2;
  445. Guest guest = 3;
  446. }
  447. }
  448. // Information for each participant of the group call.
  449. map<uint32, Participant> participants = 4;
  450. }
  451. // Supported feature (to be used as a bitmask).
  452. enum SupportedFeature {
  453. // Base feature support (always present).
  454. BASE = 0x0000;
  455. // Support for screen sharing.
  456. SCREEN_SHARE = 0x0001;
  457. }
  458. // Request payloads sent to the SFU as part of an HTTP request.
  459. message SfuHttpRequest {
  460. // Peeks for the current state of the group call for the given Group Call ID.
  461. //
  462. // IMPORTANT: The _peek_ process is considered stable across different
  463. // protocol versions. Therefore, the message **should** maintain backwards
  464. // compatibility!
  465. //
  466. // The URL is formed in the following way:
  467. //
  468. // <sfu_base_url>/v1/peek/<call_id-as-hex>
  469. //
  470. // When sending this request:
  471. //
  472. // 1. Use `POST` as method.
  473. // 2. Set the `Authorization` header to `ThreemaSfuToken <sfu-token>`.
  474. // 3. Set the encoded `SfuHttpRequest.Peek` message as body.
  475. //
  476. // When receiving this request:
  477. //
  478. // 1. If the `Authorization` header is missing, the provided `sfu-token` in
  479. // the `Authorization` header is invalid or expired, respond with status
  480. // code `401` and abort these steps.
  481. // 2. If the provided data is invalid, respond with status code `400` and
  482. // abort these steps.
  483. // 3. If `call_id` does not equal the Call ID from the URL (decoded
  484. // `call_id-as-hex`), respond with status code `400` and abort these steps.
  485. // 4. If no group call for the given `call_id` is currently running, respond
  486. // with status code `404` and abort these steps.
  487. // 5. Respond with status code `200` and an encoded `SfuHttpResponse.Peek`
  488. // message as body.
  489. message Peek {
  490. // Group Call ID associated to the group call.
  491. bytes call_id = 1;
  492. }
  493. // Requests to join the group call with the given Group Call ID.
  494. //
  495. // The URL is formed in the following way:
  496. //
  497. // <sfu_base_url>/v1/join/<call_id-as-hex>
  498. //
  499. // When sending this request:
  500. //
  501. // 1. Use `POST` as method.
  502. // 2. Set the `Authorization` header to `ThreemaSfuToken <sfu-token>`.
  503. // 3. Set the encoded `SfuHttpRequest.Join` message as body.
  504. //
  505. // When receiving this request:
  506. //
  507. // 1. If the `Authorization` header is missing, the provided `sfu-token` in
  508. // the `Authorization` header is invalid or expired, respond with status
  509. // code `401` and abort these steps.
  510. // 2. If the provided data is invalid, respond with status code `400` and
  511. // abort these steps.
  512. // 3. If `call_id` does not equal the Call ID from the URL (decoded
  513. // `call_id-as-hex`), respond with status code `400` and abort these steps.
  514. // 4. If the `protocol_version` is unsupported by the SFU, respond with status
  515. // code `419` and abort these steps.
  516. // 5. If no more participants can join the group call for the given `call_id`,
  517. // respond with status code `503` and abort these steps.
  518. // 6. Respond with status code `200` and an encoded `SfuHttpResponse.Join`
  519. // message as body.
  520. // 7. Once the WebRTC connection has been established, announce the newly
  521. // joined participant to all other participants via the corresponding data
  522. // channel. If no WebRTC connection is being established within 30s, the
  523. // participant ID is no longer reserved for the client and the group call
  524. // must be teared down if no other participant started joining this group
  525. // call.
  526. message Join {
  527. // Group Call ID associated to the group call.
  528. bytes call_id = 1;
  529. // Protocol version the call was announced with.
  530. uint32 protocol_version = 2;
  531. // DTLS fingerprint of the x509 certificate that will be used by the client.
  532. //
  533. // Note: This is the authentication anchor for the WebRTC connection towards
  534. // the SFU.
  535. bytes dtls_fingerprint = 3;
  536. }
  537. }
  538. // Response payloads sent back from the SFU as part of an HTTP request.
  539. message SfuHttpResponse {
  540. // Information returned for a running group call.
  541. //
  542. // IMPORTANT: The _peek_ process is considered stable across different
  543. // protocol versions. Therefore, the message **should** maintain backwards
  544. // compatibility!
  545. //
  546. // Note: The included `CallState` information may not be accurate and should
  547. // not be relied upon.
  548. message Peek {
  549. // Unix-ish timestamp in milliseconds for when the first participant joined
  550. // the Group Call ID and therefore started the group call.
  551. uint64 started_at = 1;
  552. // Maximum amount of participants allowed in the group call.
  553. uint32 max_participants = 2;
  554. // Call state (`CallState`), encrypted by `GCSK.secret` and prefixed with a
  555. // random nonce.
  556. //
  557. // Not provided in case the call is currently running but no participant has
  558. // sent a call state to the SFU, or if the call state expired.
  559. //
  560. // The content of the call state is protocol version dependent and should
  561. // therefore be ignored if a client does not support the particular protocol
  562. // version the group call is associated with.
  563. optional bytes encrypted_call_state = 3;
  564. }
  565. // Information returned when joining a group call.
  566. //
  567. // When receiving this response, initiate the WebRTC connection to the SFU and
  568. // consider the connection established when the `SfuToParticipant.Hello`
  569. // message has been received on the associated data channel.
  570. message Join {
  571. // Unix-ish timestamp in milliseconds for when the first participant joined
  572. // the Group Call ID and therefore started the group call.
  573. uint64 started_at = 1;
  574. // Maximum amount of participants allowed in the group call.
  575. uint32 max_participants = 2;
  576. // Participant ID assigned to the client.
  577. //
  578. // Note: The client needs to know the participant ID early to derive MIDs
  579. // required to be present in the O/A SDP.
  580. uint32 participant_id = 3;
  581. // Address the SFU is listening for a WebRTC connection.
  582. message Address {
  583. // Protocol.
  584. enum Protocol { UDP = 0; }
  585. Protocol protocol = 1;
  586. // Port.
  587. uint32 port = 2;
  588. // IPv4 or IPv6 address.
  589. string ip = 3;
  590. }
  591. // List of addresses the SFU listens for a WebRTC connection.
  592. //
  593. // Note: One UDP IPv4 address is mandatory! One IPv6 address is recommended.
  594. repeated Address addresses = 4;
  595. // ICE username fragment for the WebRTC connection.
  596. string ice_username_fragment = 5;
  597. // ICE password for the WebRTC connection.
  598. string ice_password = 6;
  599. // DTLS fingerprint of the x509 certificate that will be used by the SFU.
  600. //
  601. // Note: This is the authentication anchor for the WebRTC connection towards
  602. // the SFU.
  603. bytes dtls_fingerprint = 7;
  604. // Supported features bitmask (see `SupportedFeature`).
  605. uint64 supported_features = 8;
  606. }
  607. }
  608. // Messages sent from the SFU to a participant via a data channel.
  609. //
  610. // Data Channel Parameters:
  611. //
  612. // - `ordered`: `true`
  613. // - `negotiated`: `true`
  614. // - `id`: `0`
  615. message SfuToParticipant {
  616. // The enveloped message from the SFU.
  617. //
  618. // When relaying a message from one participant to another, omit any
  619. // additional padding.
  620. //
  621. // IMPORTANT: The format of the `SfuToParticipant.Envelope` and
  622. // `ParticipantToSfu.Envelope` must be compatible for the relay case, so the
  623. // SFU can forward the data without having to re-encode.
  624. message Envelope {
  625. // Random amount of padding, ignored by the receiver.
  626. bytes padding = 1;
  627. oneof content {
  628. ParticipantToParticipant.OuterEnvelope relay = 2;
  629. Hello hello = 3;
  630. Timestamp timestamp_response = 6;
  631. ParticipantJoined participant_joined = 4;
  632. ParticipantLeft participant_left = 5;
  633. }
  634. }
  635. // Announces all other participants to a newly joined participant.
  636. //
  637. // When receiving this message:
  638. //
  639. // 1. If a `Hello` was received before (i.e. if the receiver is not a newly
  640. // joined participant), log a warning and abort these steps.
  641. // 2. Initiate the participant to participate handshake for each participant
  642. // listed in this message.
  643. message Hello {
  644. // All participants in the group call. This **excludes** the client's
  645. // participant ID.
  646. repeated uint32 participant_ids = 1;
  647. }
  648. // Announces that a new participant joined to existing participants.
  649. //
  650. // When receiving this message:
  651. //
  652. // 1. Look up the participant. If it already exists (i.e. never _left_), log a
  653. // warning and abort these steps.
  654. // 2. Run the corresponding steps described by the _Join/Leave_ section.
  655. message ParticipantJoined { uint32 participant_id = 1; }
  656. // Announces that a participant left to existing participants.
  657. //
  658. // When receiving this message:
  659. //
  660. // 1. Look up the participant. If it was never announced to have _joined_ by
  661. // an associated `ParticipantJoined` message, log a warning and abort these
  662. // steps.
  663. // 2. Run the corresponding steps described by the _Join/Leave_ section.
  664. message ParticipantLeft { uint32 participant_id = 1; }
  665. // Current UNIX-ish timestamp in milliseconds of the SFU.
  666. //
  667. // When receiving this message:
  668. //
  669. // 1. Resolve the first pending timestamp request with the response.
  670. message Timestamp { uint64 ms = 1; }
  671. }
  672. // Messages sent from a participant to the SFU via a data channel.
  673. //
  674. // Data Channel Parameters:
  675. //
  676. // - `ordered`: `true`
  677. // - `negotiated`: `true`
  678. // - `id`: `0`
  679. message ParticipantToSfu {
  680. // The enveloped message towards the SFU.
  681. //
  682. // When relaying a message from one participant to another, omit any
  683. // additional padding.
  684. //
  685. // IMPORTANT: The format of the `SfuToParticipant.Envelope` and
  686. // `ParticipantToSfu.Envelope` must be compatible for the relay case, so the
  687. // SFU can forward the data without having to re-encode.
  688. message Envelope {
  689. // Random amount of padding, ignored by the receiver.
  690. bytes padding = 1;
  691. oneof content {
  692. ParticipantToParticipant.OuterEnvelope relay = 2;
  693. UpdateCallState update_call_state = 3;
  694. RequestTimestamp request_timestamp = 7;
  695. ParticipantMicrophone request_participant_microphone = 6;
  696. ParticipantCamera request_participant_camera = 4;
  697. ParticipantScreen request_participant_screen = 5;
  698. }
  699. }
  700. // Request the current timestamp of the SFU.
  701. //
  702. // When receiving this message:
  703. //
  704. // 1. Respond with a `TimestampResponse` message.
  705. message RequestTimestamp {}
  706. // Update the call state that can be retrieved via a _peek_.
  707. //
  708. // Note: Only the currently designated client should send this to the SFU.
  709. //
  710. // When receiving this message:
  711. //
  712. // 1. Store the encrypted call state and make it accessible via _peek_ HTTP
  713. // requests.
  714. // 2. Start a timer to purge the call state after 30s. Subsequent
  715. // `UpdateCallState` messages will update the call state and reset the
  716. // timer.
  717. message UpdateCallState {
  718. // Call state (`CallState`), encrypted by `GCSK` and prefixed with
  719. // a random nonce.
  720. bytes encrypted_call_state = 1;
  721. }
  722. // Subscribe or unsubscribe to a participant's microphone feed.
  723. //
  724. // When receiving this message:
  725. //
  726. // 1. If the `participant_id` refers to the sender's participant ID or an
  727. // unknown participant ID, discard the message and abort these steps.
  728. // 2. If `subscribe` is set, forward the microphone feed to the client that
  729. // fits best to the provided parameters.
  730. // 3. If `unsubscribe` is set, stop forwarding microphone feed of this
  731. // participant to the client.
  732. message ParticipantMicrophone {
  733. // Participant ID whose microphone feed should be subscribed or unsubscribed
  734. // from.
  735. uint32 participant_id = 1;
  736. // Subscribe to a participant's microphone feed.
  737. message Subscribe {}
  738. // Unsubscribe a participant's microphone feed.
  739. message Unsubscribe {}
  740. oneof action {
  741. Subscribe subscribe = 2;
  742. Unsubscribe unsubscribe = 3;
  743. }
  744. }
  745. // Subscribe or unsubscribe to a participant's camera feed.
  746. //
  747. // When receiving this message:
  748. //
  749. // 1. If the `participant_id` refers to the sender's participant ID or an
  750. // unknown participant ID, discard the message and abort these steps.
  751. // 2. If `subscribe` is set, forward the camera feed to the client that fits
  752. // best to the provided parameters.
  753. // 3. If `unsubscribe` is set, stop forwarding camera feed of this participant
  754. // to the client.
  755. message ParticipantCamera {
  756. // Participant ID whose camera feed should be subscribed or unsubscribed
  757. // from.
  758. uint32 participant_id = 1;
  759. // Subscribe to a participant's camera feed.
  760. message Subscribe {
  761. // Desired resolution. The client should use the canvas' resolution the
  762. // camera feed be displayed in. The SFU will select the spatial layer that
  763. // fits best.
  764. common.Resolution desired_resolution = 1;
  765. // Desired frame rate. The SFU will select the temporal layer that fits
  766. // best.
  767. uint32 desired_fps = 2;
  768. }
  769. // Unsubscribe a participant's camera feed.
  770. message Unsubscribe {}
  771. oneof action {
  772. Subscribe subscribe = 2;
  773. Unsubscribe unsubscribe = 3;
  774. }
  775. }
  776. // Subscribe or unsubscribe to a participant's screen feed.
  777. //
  778. // Availability: If the SFU announced support for
  779. // `SupportedFeature.SCREEN_SHARE`.
  780. //
  781. // When receiving this message:
  782. //
  783. // 1. If the `participant_id` refers to the sender's participant ID or an
  784. // unknown participant ID, discard the message and abort these steps.
  785. // 2. If `subscribe` is set, forward the screen feed to the client that fits
  786. // best to the provided parameters.
  787. // 3. If `unsubscribe` is set, stop forwarding screen feed of this participant
  788. // to the client.
  789. message ParticipantScreen {
  790. // Participant ID whose screen feed should be subscribed or unsubscribed
  791. // from.
  792. uint32 participant_id = 1;
  793. // Subscribe to a participant's screen feed.
  794. message Subscribe {
  795. // Desired resolution. The client should use the canvas' resolution the
  796. // screen feed be displayed in. The SFU will select the spatial layer that
  797. // fits best.
  798. common.Resolution desired_resolution = 1;
  799. // Desired frame rate. The SFU will select the temporal layer that fits
  800. // best.
  801. uint32 desired_fps = 2;
  802. }
  803. // Unsubscribe a participant's screen feed.
  804. message Unsubscribe {}
  805. oneof action {
  806. Subscribe subscribe = 2;
  807. Unsubscribe unsubscribe = 3;
  808. }
  809. }
  810. }
  811. // Messages sent from one participant to another.
  812. //
  813. // Note that these are relayed via `SfuToParticipant.Envelope` and
  814. // `ParticipantToSfu.Envelope` in order to prevent races with
  815. // `ParticipantJoined`/`ParticipantLeft`.
  816. message ParticipantToParticipant {
  817. // Used for all messages that are relayed from one participant to another via
  818. // the SFU.
  819. //
  820. // When receiving a relayed message:
  821. //
  822. // 1. If the `receiver` is not the user's assigned participant id, discard the
  823. // message and abort these steps.
  824. // 2. If the `sender` is unknown, discard the message and abort these steps.
  825. // 3. Decrypt `encrypted_data` according to the current _handshake state_ and
  826. // handle the inner envelope:
  827. // - `await-ep-hello` or `await-np-hello`: Expect a
  828. // `Handshake.HelloEnvelope`.
  829. // - `await-auth`: Expect a `Handshake.AuthEnvelope`.
  830. // - `done`: Expect a post-auth `Envelope`.
  831. message OuterEnvelope {
  832. // Participant ID of the sender. Checked by the SFU to be correct, dropped
  833. // if not.
  834. uint32 sender = 1;
  835. // Participant ID of the receiver. Checked by the SFU to exist, dropped if
  836. // not.
  837. uint32 receiver = 2;
  838. // The inner envelope. Always encrypted. Key and nonce are to be inferred
  839. // from the current _handshake state_ towards the sending participant.
  840. bytes encrypted_data = 4;
  841. }
  842. // Messages required for the initial lock-step handshake between participants.
  843. message Handshake {
  844. // The first message (`HelloEnvelope(Hello)` or `HelloEnvelope(GuestHello)`)
  845. // of both sides is always encrypted by `GCHK`, prefixed with a
  846. // random nonce.
  847. message HelloEnvelope {
  848. // Random amount of padding, ignored by the receiver
  849. bytes padding = 1;
  850. oneof content {
  851. Hello hello = 2;
  852. GuestHello guest_hello = 3;
  853. }
  854. }
  855. // If both sides started the normal handshake, the second message is
  856. // encrypted in the following way:
  857. //
  858. // 1. Let `inner-nonce` be a random nonce.
  859. // 2. Let `inner-data` be encrypted by:
  860. //
  861. // ```text
  862. // S = X25519HSalsa20(<sender.CK>.secret, <receiver.CK>.public)
  863. // GCNHAK = Blake2b(
  864. // key=S, salt='nha', personal='3ma-call', input=GCKH)
  865. // XSalsa20-Poly1305(
  866. // key=GCNHAK,
  867. // nonce=<inner-nonce>,
  868. // data=<AuthEnvelope(Auth)>,
  869. // )
  870. // ```
  871. // 3. Let `outer-data` be encrypted by:
  872. //
  873. // ```text
  874. // XSalsa20-Poly1305(
  875. // key=X25519HSalsa20(<sender.PCK>.secret, <receiver.PCK>.public),
  876. // nonce=<sender.PCCK> || <sender.PCSN+>,
  877. // data=<inner-nonce> || <inner-data>,
  878. // )
  879. // ```
  880. // 4. Return `outer-data`.
  881. //
  882. // If either side started the guest handshake, the second message is
  883. // encrypted by:
  884. //
  885. // ```text
  886. // XSalsa20-Poly1305(
  887. // key=X25519HSalsa20(<sender.PCK>.secret, <receiver.PCK>.public),
  888. // nonce=<sender.PCCK> || <sender.PCSN+>,
  889. // data=<AuthEnvelope(GuestAuth)>,
  890. // )
  891. // ```
  892. //
  893. // When receiving this message:
  894. //
  895. // 1. If either side initiated a guest handshake via a `GuestHello`, expect
  896. // `guest_auth` to be set. If `guest_auth` is not set, log a warning and
  897. // abort these steps.
  898. // 2. If both sides initiated the (normal) handshake, expect `auth` to be
  899. // set. If `auth` is not set, log a warning and abort these steps.
  900. message AuthEnvelope {
  901. // Random amount of padding, ignored by the receiver
  902. bytes padding = 1;
  903. oneof content {
  904. Auth auth = 2;
  905. GuestAuth guest_auth = 3;
  906. }
  907. }
  908. // Initial handshake message.
  909. //
  910. // When creating this message as a newly joined participant towards another
  911. // participant:
  912. //
  913. // 1. Set the participant's _handshake state_ to `await-ep-hello`.
  914. // 2. Send this message.
  915. //
  916. // When receiving this message as a guest participant:
  917. //
  918. // 1. Map it to a `GuestHello` in the following way:
  919. // - `name`: `Hello.nickname`
  920. // - `pck`: `Hello.pck`
  921. // - `pcck`: `Hello.pcck`
  922. // 2. Handle the mapped `GuestHello` as if it had been received directly.
  923. //
  924. // When receiving this message as a regular participant:
  925. //
  926. // 1. (Placeholder for conference call PCK != GCAMK step.)
  927. // 2. If the group call is scoped to a (Threema) group and `identity` is not
  928. // part of the associated group (including the user itself), log a
  929. // warning and abort these steps.
  930. // 3. If the sender is a newly joined participant and therefore the
  931. // _handshake state_ was set to `await-np-hello` (as described by the
  932. // _Join/Leave_ section):
  933. // 1. Respond by sending a `Hello` message, immediately followed by an
  934. // `Auth` message.
  935. // 2. Set the participant's _handshake state_ to `await-auth` and abort
  936. // these steps.
  937. // 4. If the participant's _handshake state_ is `await-ep-hello`:
  938. // 1. If the `pck` reflects the local PCK.public or the `pcck` reflects
  939. // the local PCCK, log a warning and abort these steps.
  940. // 2. Respond by sending an `Auth` message.
  941. // 3. Set the participant's _handshake state_ to `await-auth` and abort
  942. // these steps.
  943. // 5. Log a warning and abort these steps.
  944. message Hello {
  945. // Threema ID of the sender.
  946. string identity = 1;
  947. // Nickname associated to the Threema ID (without `~` prefix).
  948. string nickname = 2;
  949. // 32 byte ephemeral public key (`PCK.public`) towards the remote
  950. // participant.
  951. //
  952. // Note: It is allowed to use the same `PCK` for multiple participants.
  953. bytes pck = 3;
  954. // 16 byte random cookie used for nonces by the sender in subsequent
  955. // messages.
  956. bytes pcck = 4;
  957. }
  958. // Second and final handshake message.
  959. //
  960. // When receiving this message:
  961. //
  962. // 1. If the participant's _handshake state_ is not `await-auth`, log a
  963. // warning and abort these steps.
  964. // 2. If the repeated `pck` does not equal the local `PCK.public` used
  965. // towards this participant, log a warning and abort these steps.
  966. // 3. If the repeated `pcck` does not equal the local `PCCK` used towards
  967. // this participant, log a warning and abort these steps.
  968. // 4. Set the participant's _handshake state_ to `done`.
  969. message Auth {
  970. // 32 byte repeated ephemeral public key from the `Hello` message.
  971. //
  972. // Note: Repeating the sender's `PCK.public` prevents replay attacks.
  973. bytes pck = 1;
  974. // 32 byte repeated random cookie from the `Hello` message.
  975. //
  976. // Note: Repeating the sender's `PCCK` prevents replay attacks while
  977. // allowing the sender to use the same `PCK` for multiple
  978. // participants.
  979. bytes pcck = 2;
  980. // The currently applied PCMK and any _pending_ PCMK used for media
  981. // encryption, specifically in that order.
  982. //
  983. // Note: An implementation can expect at least one media key to be
  984. // present.
  985. repeated MediaKey media_keys = 3;
  986. }
  987. // Initial guest handshake message.
  988. //
  989. // When creating this message as a newly joined guest participant towards
  990. // another participant:
  991. //
  992. // 1. Set the participant's _handshake state_ to `await-ep-hello`.
  993. // 2. Send this message.
  994. //
  995. // When receiving this message:
  996. //
  997. // 1. If guest participants are not allowed for this call, log a warning
  998. // and abort these steps.
  999. // 2. (Placeholder for conference call PCK != GCAMK step.)
  1000. // 3. If the sender is a newly joined participant and therefore the
  1001. // _handshake state_ was set to `await-np-hello` (as described by the
  1002. // _Join/Leave_ section):
  1003. // 1. Respond by sending a `GuestHello` message, immediately followed by
  1004. // a `GuestAuth` message.
  1005. // 2. Set the participant's _handshake state_ to `await-guest-auth` and
  1006. // abort these steps.
  1007. // 4. If the participant's _handshake state_ is `await-ep-hello`:
  1008. // 1. If the `pck` reflects the local PCK.public or the `pcck` reflects
  1009. // the local PCCK, log a warning and abort these steps.
  1010. // 2. Respond by sending a `GuestAuth` message.
  1011. // 3. Set the participant's _handshake state_ to `await-guest-auth` and
  1012. // abort these steps.
  1013. // 5. Log a warning and abort these steps.
  1014. message GuestHello {
  1015. // The guest's self-assigned name.
  1016. string name = 1;
  1017. // 32 byte ephemeral public key (`PCK.public`) towards the remote
  1018. // participant.
  1019. //
  1020. // Note: It is allowed to use the same `PCK` for multiple participants.
  1021. bytes pck = 2;
  1022. // 16 byte random cookie used for nonces by the sender in subsequent
  1023. // messages.
  1024. bytes pcck = 3;
  1025. }
  1026. // Second and final handshake message triggered if either side initiated the
  1027. // guest handshake.
  1028. //
  1029. // When receiving this message:
  1030. //
  1031. // 1. If the participant's _handshake state_ is not `await-guest-auth`, log
  1032. // a warning and abort these steps.
  1033. // 2. If the repeated `pck` does not equal the local `PCK.public` used
  1034. // towards this participant, log a warning and abort these steps.
  1035. // 3. If the repeated `pcck` does not equal the local `PCCK` used towards
  1036. // this participant, log a warning and abort these steps.
  1037. // 4. Set the participant's _handshake state_ to `done`.
  1038. message GuestAuth {
  1039. // 32 byte repeated ephemeral public key from the `GuestHello` message.
  1040. //
  1041. // Note: Repeating the sender's `PCK.public` prevents replay attacks.
  1042. bytes pck = 1;
  1043. // 32 byte repeated random cookie from the `GuestHello` message.
  1044. //
  1045. // Note: Repeating the sender's `PCCK` prevents replay attacks while
  1046. // allowing the sender to use the same `PCK` for multiple
  1047. // participants.
  1048. bytes pcck = 2;
  1049. // The currently applied PCMK and any _pending_ PCMK used for media
  1050. // encryption, specifically in that order.
  1051. //
  1052. // Note: An implementation can expect at least one media key to be
  1053. // present.
  1054. repeated MediaKey media_keys = 3;
  1055. }
  1056. }
  1057. // After fulfilling either the (normal) handshake or the guest handshake, all
  1058. // following messages are encoded in `Envelope` and encrypted by:
  1059. //
  1060. // ```text
  1061. // XSalsa20-Poly1305(
  1062. // key=X25519HSalsa20(<sender.PCK>.secret, <receiver.PCK>.public),
  1063. // nonce=<sender.PCCK> || <sender.PCSN+>,
  1064. // )
  1065. // ```
  1066. //
  1067. // Note: Since the guest handshake is TOFU, an attacker knowing `GCK` having
  1068. // control over the SFU may apply a MITM attack between a guest participant
  1069. // and another participant. The attacker would be able to silently eavesdrop
  1070. // all media traffic between the two participants. This is repeatable for all
  1071. // other participants and means the attacker is able to silently eavesdrop the
  1072. // whole call. Therefore, if a call is not open for guests, `GuestHello` (and
  1073. // `GuestAuth`) **must not** be accepted.
  1074. //
  1075. // When receiving this message:
  1076. //
  1077. // 1. If the participant's _handshake state_ is not `done`, log a warning and
  1078. // abort these steps.
  1079. // 2. Handle the message according to the content.
  1080. message Envelope {
  1081. // Random amount of padding, ignored by the receiver
  1082. bytes padding = 1;
  1083. oneof content {
  1084. // An `Admin.Envelope`, encrypted as described by that message.
  1085. bytes encrypted_admin_envelope = 2;
  1086. // Announces new media keys a participant will apply soon.
  1087. MediaKey rekey = 3;
  1088. // Announces capture state changes of a participant.
  1089. CaptureState capture_state = 4;
  1090. // Announces that the participant entered the _hold_ state.
  1091. HoldState hold_state = 5;
  1092. }
  1093. }
  1094. // Messages from admins towards participants (including admins).
  1095. message Admin {
  1096. // Message from an administrator, encrypted by:
  1097. //
  1098. // ```text
  1099. // XSalsa20-Poly1305(
  1100. // key=X25519HSalsa20(GCAMK.secret, <receiver.PCK>.public),
  1101. // nonce=<sender.PCCK> || <sender.PCSN+>,
  1102. // )
  1103. // ```
  1104. //
  1105. // IMPORTANT: The `ParticipantToParticipant.Envelope` that encapsulates this
  1106. // message shall be encrypted by the same `PCSN` as used for this
  1107. // `Envelope`. The only difference is that the sender uses `GCAMK` instead
  1108. // of its ephemeral `PCK`.
  1109. message Envelope {
  1110. oneof content {
  1111. ReportAsAdmin report_as_admin = 1;
  1112. PromoteToAdmin promote_to_admin = 2;
  1113. ForceLeave force_leave = 3;
  1114. ForceCaptureStateOff force_capture_state_off = 4;
  1115. ForceFocus force_focus = 5;
  1116. }
  1117. }
  1118. // Report as an administrator.
  1119. //
  1120. // When receiving this message, mark the sender as an administrator in the
  1121. // UI.
  1122. message ReportAsAdmin {}
  1123. // Promote the receiver to an administrator.
  1124. //
  1125. // Note: This is final for the scope of this Group Call. An administrator
  1126. // cannot be demoted.
  1127. //
  1128. // When receiving this message:
  1129. //
  1130. // 1. If the user already is an administrator, abort these steps.
  1131. // 2. Derive GCAMK and calculate the associated public key from the received
  1132. // `gcak`. If it does not match the known `GCAMK.public`, log a warning
  1133. // and abort these steps.
  1134. // 3. Send an `Admin.ReportAsAdmin` message to all other participants
  1135. // (including the sender who promoted the user to an admin).
  1136. // 4. Notify the user of its admin status and enable administration
  1137. // functionality in the UI.
  1138. message PromoteToAdmin { bytes gcak = 1; }
  1139. // Force the receiver to leave the call.
  1140. message ForceLeave {}
  1141. // Force the receiver's capture device to be turned off.
  1142. //
  1143. // Note: This is a momentary enforcement. A participant may immediately
  1144. // restart capturing a device (e.g. unmute itself) and the message is
  1145. // not repeated towards newly joined participants.
  1146. //
  1147. // When receiving this message:
  1148. //
  1149. // 1. Look up the corresponding device. If none could be found, abort these
  1150. // steps.
  1151. // 2. If the device's capture state is already _off_, abort these steps.
  1152. // 3. Send a `CaptureState` message for the device and follow the creation
  1153. // steps of that message (i.e. stop capturing, etc.).
  1154. message ForceCaptureStateOff {
  1155. enum Device {
  1156. // Stop capturing all devices
  1157. ALL = 0;
  1158. // Stop capturing the microphone (i.e. mute)
  1159. MICROPHONE = 1;
  1160. // Stop capturing the camera
  1161. CAMERA = 2;
  1162. // Stop capturing the screen
  1163. SCREEN = 3;
  1164. }
  1165. Device device = 1;
  1166. }
  1167. // Force focus on a specific participant.
  1168. //
  1169. // Note: This is a momentary enforcement. A participant may immediately
  1170. // remove the focus and the message is not repeated towards newly
  1171. // joined participants.
  1172. //
  1173. // When receiving this message:
  1174. //
  1175. // 1. Look up the participant to be focused. If none could be found, abort
  1176. // these steps.
  1177. // 2. Focus the participant in the UI. The camera or screen feed
  1178. // subscription may need to be created (e.g. participant was not visible
  1179. // in the viewport before) or updated (e.g. display resolution changes
  1180. // due to focus) by a corresponding `Subscribe` message sent to the SFU.
  1181. message ForceFocus { uint32 participant_id = 1; }
  1182. }
  1183. // Media keys a participant will use for sending.
  1184. //
  1185. // Will be sent towards new and existing participants as described by the
  1186. // _Join/Leave_ section.
  1187. message MediaKey {
  1188. // The current epoch reflecting the PCMK state.
  1189. //
  1190. // Initially, epoch is `0` and increases each time a participant leaves. The
  1191. // concrete mechanism is explained in the _Join/Leave_ section.
  1192. uint32 epoch = 1;
  1193. // The current ratchet counter reflecting the PCMK state.
  1194. //
  1195. // Initially (or when a participant leaves), the ratchet counter is `0` and
  1196. // increases each time a participant joins. The ratcheting mechanism is
  1197. // explained in the _Join/Leave_ section.
  1198. uint32 ratchet_counter = 2;
  1199. // The current state of the PCMK with the applied ratchet counter.
  1200. //
  1201. // Initially (or when a participant leaves), PCMK is a random 32 byte secret
  1202. // key. The concrete mechanism is explained in the _Join/Leave_ section.
  1203. //
  1204. // This key must be identical **towards** all participants.
  1205. bytes pcmk = 3;
  1206. }
  1207. // Signals a participant's device capturing state.
  1208. //
  1209. // When creating this message:
  1210. //
  1211. // 1. Let `device` be the device whose state is to be updated.
  1212. // 2. If `device` is to be turned _off_:
  1213. // 1. Stop capturing from the device.
  1214. // 2. Pause the corresponding media track.
  1215. // 3. If `device` is to be turned _on_:
  1216. // 1. Start capturing from the device.
  1217. // 2. Resume the corresponding media track.
  1218. // 4. If `device` is of type _screen_:
  1219. // 1. Send a `RequestTimestamp` to the SFU.
  1220. // 2. Let `context` be the corresponding `Timestamp` response.
  1221. // 5. Send the `CaptureState` message for the `device` with the provided
  1222. // `context`.
  1223. //
  1224. // When receiving this message:
  1225. //
  1226. // 1. Let `device` be the device of the sender whose state has been updated.
  1227. // 2. If `device` was turned _off_ and the user is subscribed to the given
  1228. // `device`'s feed:
  1229. // 1. Stop displaying the corresponding media feed in the UI.
  1230. // 2. Pause the corresponding media track.
  1231. // 3. If `device` is `Microphone`, no further action is necessary.
  1232. // 4. If `device` is `Camera`, send a `ParticipantCamera.Unsubcribe`
  1233. // message to the SFU.
  1234. // 5. If `device` is `Screen`, send a `ParticipantScreen.Unsubcribe`
  1235. // message to the SFU.
  1236. // 3. If `device` was turned _on_ and the user is not subscribed to the given
  1237. // `device`'s feed:
  1238. // 1. Resume the corresponding media track.
  1239. // 2. Start displaying the corresponding media feed in the UI.
  1240. // 3. If `device` is `Microphone`, no further action is necessary.
  1241. // 4. If `device` is `Camera`, send a `ParticipantCamera.Subscribe` message
  1242. // to the SFU.
  1243. // 5. If `device` is `Screen`:
  1244. // 1. Send a `ParticipantScreen.Subscribe` message to the SFU.
  1245. // 2. If `device`'s `started_at` timestamp is the most recent timestamp
  1246. // from all currently active screen shares of all participants, set
  1247. // the focus to this participant's screen.
  1248. message CaptureState {
  1249. // Capture state of the microphone.
  1250. message Microphone {
  1251. oneof state {
  1252. common.Unit on = 1;
  1253. common.Unit off = 2;
  1254. }
  1255. }
  1256. // Capture state of the camera.
  1257. message Camera {
  1258. oneof state {
  1259. common.Unit on = 1;
  1260. common.Unit off = 2;
  1261. }
  1262. }
  1263. // Capture state of the screen.
  1264. message Screen {
  1265. message On {
  1266. // UNIX-ish timestamp in milliseconds retrieved from the SFU for when
  1267. // the screen share was initiated.
  1268. uint64 started_at = 1;
  1269. }
  1270. oneof state {
  1271. On on = 1;
  1272. common.Unit off = 2;
  1273. }
  1274. }
  1275. oneof state {
  1276. Microphone microphone = 1;
  1277. Camera camera = 2;
  1278. Screen screen = 3;
  1279. }
  1280. }
  1281. // Signals that a participant is currently on hold / temporarily away.
  1282. //
  1283. // When creating this message:
  1284. //
  1285. // 1. Send a `CaptureState` message for each capture device. Follow the
  1286. // creation steps of that message.
  1287. // 2. Send the `HoldState` message.
  1288. //
  1289. // When receiving this message:
  1290. //
  1291. // 1. Apply the _hold_ state in the UI for the participant.
  1292. // 2. Pause any video-based media tracks of the participant.
  1293. // 3. If subscribed to the participant's camera feed, send a
  1294. // `ParticipantCamera.Unsubcribe` message to the SFU.
  1295. // 4. If subscribed to the participant's screen feed, send a
  1296. // `ParticipantScreen.Unsubcribe` message to the SFU.
  1297. message HoldState {}
  1298. }