Skip to content
Snippets Groups Projects
Commit b2d2a699 authored by Ubuntu's avatar Ubuntu
Browse files

updated server wikify script

parent ddaac2cf
No related branches found
No related tags found
No related merge requests found
#!/usr/local/bin Rscript
setwd("/mnt2/")
setwd("/mnt/")
#load R packages
library(stringi)
library(reticulate)
......@@ -13,7 +13,9 @@ Sys.setenv(RETICULATE_PYTHON = '/usr/bin/python')
#load functions
source_python("~/nlp-pipeline/api/findPhrase.py")
message("Reading page.csv in...")
page<-fread("page.csv")
message("Done.")
collapse_spans<- function(tok_span_tab){
tab<-tok_span_tab
......@@ -71,7 +73,7 @@ Wikify<-function(text){
main <- function(){
#there are 93716482 sentences in the 6 out files
flist <- list.files("/mnt2/")
flist <- list.files("/mnt/wikiClean/")
#REMEMBER TO REMOVE WHEN PACKAGING UP
......@@ -83,7 +85,7 @@ main <- function(){
print(paste0("*** Now working on ", file, " ***"))
#pick a chunk size to start and batch size
n_todo<-as.numeric(system(paste0("cat /mnt2/", file, " | wc -l"),
n_todo<-as.numeric(system(paste0("cat /mnt/wikiClean/", file, " | wc -l"),
intern = TRUE))
#initialize other parameters
......@@ -113,7 +115,7 @@ main <- function(){
skip <- 0
}
message("Reading the chunk of the file in...")
chunk <- fread(paste0("/mnt2/", file),
chunk <- fread(paste0("/mnt/wikiClean/", file),
sep = "\n",
header = FALSE,
fill=TRUE,
......@@ -132,7 +134,7 @@ main <- function(){
message("Printing the head of sentences...")
head(sentences)
message(paste0("Writing out part of the chunk..."))
fwrite(sentences, "/mnt2/wikified_sentences.csv", append=TRUE)
fwrite(sentences, "/mnt/wikified_sentences.csv", append=TRUE)
#adjust n_todo
n_todo <- n_todo - c_chunk_size
......@@ -149,8 +151,8 @@ main <- function(){
param_chain<-c_chunk_size
#report to user in message
timeRow <- data.table(file = file, nworkers = workers, ntodo = n_todo, chunkSize = c_chunk_size, epoch = counter, timeElapsed = e_time)
fwrite(timeRow, "time_stats.csv", append=TRUE)
timeRow <- data.table(file = file, nworkers = workers, ntodo = n_todo, chunkSize = c_chunk_size, epoch = counter, timeElapsed = e_time, efficiency = (e_time/workers))
fwrite(timeRow, paste0("time_stats", workers, ".csv"), append=TRUE)
message(paste0(n_todo, " sentences remaining | ",
unclass(left)[1]," ",
data.frame(t(unlist(attributes(left))))$units[1],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment