-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnapp_scicomp_issues.Rmd
253 lines (199 loc) · 7.71 KB
/
snapp_scicomp_issues.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
---
title: "snapp_scicomp_issues"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(gh)
library(purrr)
library(tidyr)
library(curl)
library(keyring)
library(plotly)
library(rlang)
library(data.table)
library(DT)
```
# Steps:
1) Save PAT from github (user can use keyring package like I did, or save in .Renvirone, etc.)
2) use gh package to grab the information
3) Create data frame from the gh object
# 1) creating a new keyring that store git credentials
```{r}
# makes a new backend environment
kb <- keyring::backend_file$new()
# # run this one only run once
# # choose a password for your keyring when prompted
# kb$keyring_create("Git_Credentials")
# unlock with keyring password to set values within keychain
kb$keyring_unlock("Git_Credentials")
# enter in personal authentication token (PAT) when prompted
kb$set(service = "Github_Enterprise_Token", username = "maggieklope", keyring = "Git_Credentials")
# # run this if you need to view your PAT
# kb$get("Github_Enterprise_Token", keyring = "Git_Credentials")
# finish by locking keyring
kb$keyring_lock("Git_Credentials")
```
# 2) using gh package to get data from the SNAPP-wg-scicomp
for the enpoint:
- to look at issues on a repo, enterprise accounts and github.com accounts use the same format with "repose/owner/repo/issues"
for .api_url:
- enterprise accounts use https://github.nceas.ucsb.edu/api/v3
- github.com accounts use https://api.github.com
```{r}
# enter in keyring password when prompted
issues_gh <- gh(
endpoint = "/repos/SNAPP/snapp-wg-scicomp/issues",
.api_url = "https://github.nceas.ucsb.edu/api/v3",
owner = "SNAPP",
repo = "snapp-wg-scicomp",
.token = kb$get("Github_Enterprise_Token", keyring = "Git_Credentials"),
state = "all",
.limit = "all")
# # to see everything that could be added into a data frame
# issues_gh[[1]]
```
# 3) Create a dataframe from the gh object
I decided to do this in a loop:
- For columns with missing values (ex: repos that have not been closed yet), I added in ifelse() statement because rbind will ignore NA values causing it to have a different length than the data frame
- When there is more than one value (ex: multiple assignees & repo labels) I made it so that they are listed together in one cell
- It might be too much information, but it can also pull the text from the issue
```{r}
# getting number of issues to set row
n_issues <- 1:length(issues_gh)
# blank data frame
df_test <- data.frame()
# filling data frame using rbind
for (i in n_issues) {
df_test <- rbind(df_test, data.frame(
repo_num = i,
title = issues_gh[[i]]$title,
creator = issues_gh[[i]]$user$login,
assignees = ifelse(
test = is.null(issues_gh[[i]]$assignee$login),
yes = NA,
no = as.character(
as.data.frame(issues_gh[[i]]$assignees) %>%
dplyr::select(contains("login")) %>%
unite(col = "assignees", sep = ", "))
),
date_created = as.Date(issues_gh[[i]]$created_at),
status = issues_gh[[i]]$state,
date_closed = ifelse(test = is.null(issues_gh[[i]]$closed_at),
yes = NA,
no = issues_gh[[i]]$closed_at) %>% as.Date(),
labels = ifelse(
test = length(issues_gh[[i]]$labels) == 0,
yes = NA,
no = as.character(
flatten_dfc(issues_gh[[i]]$labels) %>%
dplyr::select(contains("name")) %>%
unite(col = "labels", sep = ", "))
),
num_comments = issues_gh[[i]]$comments
))
}
# write_csv(df_test, path = "df_test.csv")
```
4) Create table that can filter by date & other features
- datatable() creates a HTML widget to display R data objects
- better way to sort dates might be to make a new column that has the name of the fiscal year (ex: 2019-2020) to sort with?
## first method could be to filter data based on input dates taken by the user, and then put that into DT
```{r}
#enter desired dates in yyyy-mm-dd
start_date <- "2020-01-01"
end_date <- "2020-12-31"
df_filtered <- df_test %>%
dplyr::filter(date_created >= as.Date(start_date)) %>%
dplyr::filter(date_closed <= as.Date(end_date))
datatable(df_filtered)
```
## or can use DT's filter argument
- kind of difficult to use the slider at the top for dates, but I do like that it has the search bars so you can filter it to open/closed or to a specific assignee
```{r}
datatable(df_test,
filter = 'top'
)
```
# Putting together as a function
```{r}
# this function assumes that the user has saved their token. They can use the keyring package the way I did above or use their own and set it as the PAT argument
function_test <- function(github_api_endpoint, url, repo_owner, repo_name, PAT){
issues_gh <- gh(endpoint = github_api_endpoint,
.api_url = url,
owner = repo_owner,
repo = repo_name,
.token = PAT,
state = "all",
.limit = "all")
# # exiting and returning message if repo has no issues
# if(length(issues_gh) == 0) stop("there are no issues on repo")
# getting number of issues to set row
n_issues <- 1:length(issues_gh)
# blank data frame
df_test <- data.frame()
# creating data frame
for (i in n_issues) {
df_test <- rbind(df_test, data.frame(
repo_num = i,
title = issues_gh[[i]]$title,
creator = issues_gh[[i]]$user$login,
assignees = ifelse(
test = is.null(issues_gh[[i]]$assignee$login),
yes = NA,
no = as.character(
as.data.frame(issues_gh[[i]]$assignees) %>%
dplyr::select(contains("login")) %>%
unite(col = "assignees", sep = ", "))
),
date_created = as.Date(issues_gh[[i]]$created_at),
status = issues_gh[[i]]$state,
date_closed = ifelse(test = is.null(issues_gh[[i]]$closed_at),
yes = NA,
no = issues_gh[[i]]$closed_at) %>% as.Date(),
labels = ifelse(
test = length(issues_gh[[i]]$labels) == 0,
yes = NA,
no = as.character(
flatten_dfc(issues_gh[[i]]$labels) %>%
dplyr::select(contains("name")) %>%
unite(col = "labels", sep = ", "))
),
num_comments = issues_gh[[i]]$comments
))
}
# this option will return a data frame
return(df_test)
# # # this option will reutrn a DT html table
# # table <- datatable(df_temp,filter = 'top')
# # return(table)
}
```
# works for NCEAS github enterprise account with token:
```{r}
df <- function_test(github_api_endpoint = "/repos/SNAPP/snapp-wg-scicomp/issues",
url = "https://github.nceas.ucsb.edu/api/v3",
repo_owner = "SNAPP",
repo_name = "snapp-wg-scicomp",
PAT = kb$get("Github_Enterprise_Token", keyring = "Git_Credentials"))
```
# testing function on my own, non-enterprise repo
```{r}
# resetting the keyring to my own PAT
kb$keyring_unlock("Git_Credentials")
kb$set(service = "Github_Enterprise_Token", username = "maggieklope", keyring = "Git_Credentials")
kb$keyring_lock("Git_Credentials")
# using function on test repo
my_repo_test <- function_test(github_api_endpoint = "/repos/maggieklope/test-repo/issues",
url = "https://api.github.com",
repo_owner = "maggieklope",
repo_name = "test-repo",
PAT = kb$get("PAT", keyring = "Git_Credentials"))
# gets error when there are no issues, need to add some sort of a "this repo has no issues" response
function_test(github_api_endpoint = "/repos/maggieklope/leaflet-tutorial/issues",
url = "https://api.github.com",
repo_owner = "maggieklope",
repo_name = "leaflet-tutorial",
PAT = kb$get("Github_Enterprise_Token", keyring = "Git_Credentials"))
```