-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathextract_db_puncts.cpp
59 lines (56 loc) · 1.98 KB
/
extract_db_puncts.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <iostream>
#include <string>
#include "logger.h"
#include "db_factory.h"
#include "query_result.h"
#include <set>
#include <map>
db *db_factory::singleton_instance=NULL;
int main(int argc, char* argv[]){
db *sqlite=NULL;
query_result *punct_entries=NULL,*max_feature_nr_entry=NULL;
std::set<std::string> stems;
if(argc<3){
std::cerr<<"Usage: extract_db_puncts /path/to/morph/db/file lexicon_name"<<std::endl;
exit(EXIT_FAILURE);
}
//logger::singleton("console",3,"LE");
std::string db_file=argv[1];
std::string lexicon=argv[2];
sqlite=db_factory::get_instance();
sqlite->open(db_file);
punct_entries=sqlite->exec_sql("SELECT WORDFORM, GROUP_ID FROM MORPHOLOGY WHERE FEATURE_NR=1 AND FEATURE='Punct';");
if(punct_entries!=NULL){
for(unsigned int i=0;i<punct_entries->nr_of_result_rows();++i){
std::string wordform=*punct_entries->field_value_at_row_position(i,"wordform");
// if(wordform.front()!='\''&&wordform.front()!='-'&&wordform.find_first_not_of("'-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")==std::string::npos){
std::string wordform_escaped;
for(auto j:wordform){
if(j=='\''){
wordform_escaped+="''";
}
else{
wordform_escaped+=j;
}
}
std::string group_id=*punct_entries->field_value_at_row_position(i,"group_id");
query_result *current_punct_entries=sqlite->exec_sql("SELECT * FROM MORPHOLOGY WHERE WORDFORM='"+wordform_escaped+"' AND GROUP_ID='"+group_id+"' ORDER BY FEATURE_NR;");
if(current_punct_entries!=NULL){
std::string stem=*current_punct_entries->field_value_at_row_position(0,"feature");
// if(stem.front()!='\''&&stem.front()!='-'&&stem.find_first_not_of("'-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")==std::string::npos){
stems.insert(stem);
// }
}
delete current_punct_entries;
// }
}
delete punct_entries;
}
for(auto i:stems){
std::cout<<i<<" "<<lexicon<<";"<<std::endl;
}
sqlite->close();
db_factory::delete_instance();
sqlite=NULL;
return 0;
}