-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcounters.Rmd
291 lines (202 loc) · 6.71 KB
/
counters.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
---
title: "Sufficient statistics"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Definitions
## Array
```cpp
// Common classes
typedef unsigned int uint;
typedef std::vector< int > ivec;
typedef std::vector< uint > uvec;
typedef std::vector< double > dvec;
typedef std::unordered_map< uint, double > map2dbl;
typedef std::unordered_map< uint, map2dbl > map2map;
class array {
public:
// Creation
array(uvec row, uvec col);
/* Queries
*/
uint nrow() const {return nrow_;}
uint ncol() const {return ncol_;}
bool is_blocked(uint row, uint col) const;
bool is_in_range(uint row, uint col) const;
/* Return the content of a cell
* @param row,col coordinates of the cell to be query.
* @param check Logical. When `true` the function checks boundaries
* before returning.
*/
int cell(uint row, uint col, bool check = true);
/* Return a full row or column as a 0/1 vector
*
*/
uvec row(uint i, bool check = true);
uvec col(uint i, bool check = true);
/* Returns an interator that as value has the coordinates of the
* non zero elements of the row.
*/
uvec::const_iterator row_const_iterator(uint i, bool check = true);
uvec::const_iterator col_const_iterator(uint i, bool check = true);
// Actions
int set_cell_on(uint row, uint col, bool check = true);
int set_cell_off(uint row, uint col, bool check = true);
int toggle_cell(uint row, uint col, bool & added, bool check = true);
private:
uint nrow_, ncol_;
TBD blocked_;
map2map data_;
}
```
Function definitions
```cpp
int array::set_cell_on(uint row, uint col, bool check = true) {
if (check) {
int ans = is_in_range(row, col);
if (ans != 0) return ans;
ans = is_blocked(row, col, false);
if (ans != 0) return ans;
int ans = this->cell(row, col, false)
if (ans == 1) return ans;
}
// OPERATIONS ADDING A NEW CELL TBD to data_
return 0;
}
int array::set_cell_off(uint row, uint col, bool check = true) {
if (check) {
int ans = is_in_range(row, col);
if (ans != 0) return ans;
ans = is_blocked(row, col, false);
if (ans != 0) return ans;
int ans = this->cell(row, col, false)
if (ans == 0) return ans;
}
// OPERATIONS REMOVING A NEW CELL TBD to data_
return 0;
}
int array::toggle_cell(uint row, uint col, bool & added, bool check = true) {
// Checking boundaries
if (check) {
int ans = is_in_range(row, col);
if (ans != 0) return ans;
ans = is_blocked(row, col);
if (ans != 0) return ans;
}
// Aplying the operation
if (this->cell(row, col, false) == 1) {
added = false;
return set_cell_off(row, col, false);
} else {
added = true;
return set_cell_on(row, col, false);
}
};
```
## Change
```cpp
class array_change {
public:
/* Initialization
* @param ptr Pointer to an `array` object
* @param row,col Coordinates where to add it.
*/
int array_change(array_pointer array, uint row, uint col, bool check = true) :
row_(row), col_(col), array_(array){
// When turning cells on or off, we also check whether these have values
// or don't, as well as the size of the arrays. This is done internally in
// the toggling functions.
int ans = array_->toggle_cell(row, col, add_, check);
if (ans != 0) return ans;
return 0;
};
/* Update the change
*
*/
int toggle(uint row, uint col, bool check = true) {
return array_->toggle_cell(row, col, add_, check);
}
// Queries
uint row() {return row_;}
uint col() {return col_;}
bool is_add() {return add_;}
private:
uint row_, col_;
array_pointer array_;
bool add_;
}
```
## Number of non-zero cells
## Transitions
**Duplication event**
These usually happen once at a time. At each bifurcation it is usually the case that only one function is gain after a duplication event. In our model, we assume that we observe the genes at that point, so we actually represent it by not having any function prior to that.
Another point to consider is that this usually happens with a single sibling, this is, in a duplication event, of $n$ siblings only one gains a new function. For example, if we have 2 siblings (columns) and two functions (rows), we would see a transition in this form:
$$
\left[\begin{array}{cc}
0 & 1\\
0 & 0
\end{array}\right]\to
\left[\begin{array}{cc}
0 & 1\\
1 & 0
\end{array}\right]
$$
In principle, to allow the statistic to have enough variability, we can count how many of these types of transitions we observe, in particular, one column holding constant from one step to the other and the other changing a single cell.
In change statistics this is very straight forward to implent:
```cpp
// Defines an operation, where it happens (row, col) and whether it is a
// deletion or addition of a cell.
struct array_operation {
bool is_add;
uint row, col;
};
int change_single_gains(
array_operation oper,
array_ptr ptr
) {
// We first check if we are adding or removing a cell
if (oper.is_add) {
// In the case of adding, the result is straight forward and it only
// implies returning # siblings - 1
return ptr->ncols() - 1;
} else {
// Otherwise, if we are removing, we need to see if this counter was
// been used before
int half = ptr->ncols() / 2;
// We start by assuming that we will delete something
int ans = -1;
// If the initial state of the sibling was 0, then, since we are
// removing, we assume that changes in the sufficient statistic
// are if one of the siblings actually changed
if (ptr->cell(oper.row, oper.col) == 0) {
// Looking for siblings that swapped from having to not having a
// function. For this, we should iterate through the row
for (int i = 0; i < half; ++i) {
// Skipping the current. Recall that only the second half is
// changing only.
if (i == (oper.col + half))
continue;
// Checking if the i-th sibling switched from not to have
if (ptr->cell(oper.row, i) == 0 && ptr->cell())
}
} else {
for (int i = 0; i < half; ++i) {
if (i == oper.col && ptr->cell(oper.row,))
continue;
for (int j = 0; j < half; ++j) {
// Again, we don't need to check this twice
if (j == oper.col)
continue;
// Now we need to look at all transitions
for (k = 0; k < ptr->nrows(); ++k) {
// No gain or loss
if (ptr->cell(k, j) == ptr->cell(k, j + half))
}
}
}
}
}
}
```