-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap_metrics.py
143 lines (117 loc) · 4.13 KB
/
map_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Map data from the internal InCites API to VIVO.
"""
import json
from slugify import slugify
from rdflib import Graph, Literal, RDF, RDFS, URIRef
from namespaces import rq_prefixes
from namespaces import (
D,
VIVO,
WOS,
)
from lib import backend
from wos_categories import get_category_uri
from publications import waan_uri
import settings
logger = settings.get_logger()
def hash_uri(prefix, value):
return D[prefix + '-' + hashlib.md5(value).hexdigest()]
def local_name(uri):
return uri.split('/')[-1]
def get_unified_orgs():
q = rq_prefixes + """
select ?wosU ?org
where {
?wosU a wos:UnifiedOrganization ;
rdfs:label ?org .
}
"""
vstore = backend.get_store()
out = []
for row in vstore.query(q):
out.append((row.wosU.toPython(), row.org.toPython()))
return out
def load_incites_json_file(name, ictype):
fname = "org-" + slugify(name)
try:
with open('data/incites/{}/{}.json'.format(ictype, fname)) as inf:
return json.load(inf)
except IOError:
logger.warn("Could not find metrics for {}.".format(name))
return []
def org_total_counts(orgs):
g = Graph()
for org_name in orgs:
org_uri = waan_uri(org_name)
ln = local_name(org_uri)
pcounts = load_incites_json_file(org_name, 'total')
if len(pcounts) == 0:
logger.warning("{} file is empty.".format(org_name))
continue
for item in pcounts:
curi = D['pubcount-' + ln + '-' + str(item['year'])]
g.add((curi, RDF.type, WOS.InCitesPubPerYear))
g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count']))))
g.add((curi, WOS.number, Literal(item['count'])))
g.add((curi, WOS.year, Literal(item['year'])))
g.add((org_uri, VIVO.relates, curi))
ng = settings.INCITES_PUB_YEAR_COUNTS
backend.sync_updates(ng, g)
return True
def org_total_cites(orgs):
g = Graph()
for org_name in orgs:
org_uri = waan_uri(org_name)
ln = local_name(org_uri)
tc = load_incites_json_file(org_name, 'cites')
if len(tc) == 0:
logger.warning("{} file is empty.".format(org_name))
continue
for item in tc:
curi = D['citecount-' + ln + '-' + str(item['year'])]
g.add((curi, RDF.type, WOS.InCitesCitesPerYear))
g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count']))))
g.add((curi, WOS.number, Literal(item['count'])))
g.add((curi, WOS.year, Literal(item['year'])))
g.add((org_uri, VIVO.relates, curi))
ng = settings.INCITES_TOTAL_CITES_YEAR
backend.sync_updates(ng, g)
return True
def org_top_categories(orgs):
g = Graph()
for org_name in orgs:
org_uri = waan_uri(org_name)
ln = local_name(org_uri)
top_cat = load_incites_json_file(org_name, 'categories-by-year')
if len(top_cat) == 0:
logger.warning("{} file is empty.".format(org_name))
continue
for item in top_cat:
cat = item['category']
for tc_yr in item['counts']:
count = tc_yr['count']
year = tc_yr['year']
category_uri = get_category_uri(cat)
curi = D['topcategory-'] + ln + slugify(cat) + '-{}'.format(year)
g.add((curi, RDF.type, WOS.InCitesTopCategory))
g.add((curi, RDFS.label, Literal("{} - {}".format(org_name, cat))))
g.add((curi, WOS.number, Literal(count)))
g.add((curi, WOS.year, Literal(year)))
g.add((curi, VIVO.relates, category_uri))
g.add((curi, VIVO.relates, org_uri))
ng = settings.INCITES_TOP_CATEGORIES
backend.sync_updates(ng, g)
return True
def main():
"""
Get the orgs in the system and load the incites data for each.
"""
to_load = []
for ouri, name in get_unified_orgs():
to_load.append(name)
org_top_categories(to_load)
org_total_cites(to_load)
org_total_counts(to_load)
if __name__ == "__main__":
main()