Source code for emop.lib.processes.juxta_compare

import os
from emop.lib.utilities import exec_cmd
from emop.lib.processes.processes_base import ProcessesBase


[docs]class JuxtaCompare(ProcessesBase): def __init__(self, job): super(self.__class__, self).__init__(job) self.home = self.job.settings.juxta_home self.executable = os.path.join(self.home, "juxta-cl.jar") self.jx_algorithm = self.job.settings.juxta_cl_jx_algorithm
[docs] def should_run(self): if self.job.page_result.juxta_change_index_exists: return False else: return True
[docs] def run(self, postproc): if not self.job.page.hasGroundTruth(): return self.results(stdout=None, stderr=None, exitcode=0) if postproc: input_file = self.job.alto_txt_file else: input_file = self.job.idhmc_txt_file if not input_file or not os.path.isfile(input_file): stderr = "Could not find JuxtaCompare input file: %s" % input_file return self.results(stdout=None, stderr=stderr, exitcode=1) cmd = [ "java", "-Xms128M", "-Xmx128M", "-jar", self.executable, "-diff", self.job.page.ground_truth_file, input_file, "-algorithm", self.jx_algorithm, "-hyphen", "none" ] proc = exec_cmd(cmd) if proc.exitcode != 0: # TODO: juxta-cl.jar errors are going to stdout not stderr if not proc.stdout and proc.stderr: stderr = proc.stderr else: stderr = proc.stdout return self.results(stdout=proc.stdout, stderr=stderr, exitcode=proc.exitcode) out = proc.stdout.strip() # Handle invalid values returned by Juxta if out == 'NaN': value = '-1' else: value = float(out) if postproc: # self.job.postproc_result.pp_juxta = value self.job.page_result.juxta_change_index = value # else: # self.job.page_result.juxta_change_index = value return self.results(stdout=None, stderr=None, exitcode=0)