#include "osmHyp.h" #include using namespace std; using namespace lm::ngram; namespace Moses2 { void osmState::setState(const lm::ngram::State & val) { j = 0; E = 0; lmState = val; } void osmState::saveState(int jVal, int eVal, map & gapVal) { gap.clear(); gap = gapVal; j = jVal; E = eVal; } size_t osmState::hash() const { size_t ret = j; boost::hash_combine(ret, E); boost::hash_combine(ret, gap); boost::hash_combine(ret, lmState.length); return ret; } bool osmState::operator==(const FFState& otherBase) const { const osmState &other = static_cast(otherBase); if (j != other.j) return false; if (E != other.E) return false; if (gap != other.gap) return false; if (lmState.length != other.lmState.length) return false; return true; } std::string osmState :: getName() const { return "done"; } ////////////////////////////////////////////////// osmHypothesis :: osmHypothesis() { opProb = 0; gapWidth = 0; gapCount = 0; openGapCount = 0; deletionCount = 0; gapCount = 0; j = 0; E = 0; gap.clear(); } void osmHypothesis :: setState(const FFState* prev_state) { if(prev_state != NULL) { j = static_cast (prev_state)->getJ(); E = static_cast (prev_state)->getE(); gap = static_cast (prev_state)->getGap(); lmState = static_cast (prev_state)->getLMState(); } } void osmHypothesis :: saveState(osmState &state) { state.setState(lmState); state.saveState(j,E,gap); } int osmHypothesis :: isTranslationOperation(int x) { if (operations[x].find("_JMP_BCK_") != -1) return 0; if (operations[x].find("_JMP_FWD_") != -1) return 0; if (operations[x].find("_CONT_CEPT_") != -1) return 0; if (operations[x].find("_INS_GAP_") != -1) return 0; return 1; } void osmHypothesis :: removeReorderingOperations() { gapCount = 0; deletionCount = 0; openGapCount = 0; gapWidth = 0; std::vector tupleSequence; for (int x = 0; x < operations.size(); x++) { // cout< & coverageVector) { int firstOG =-1; for(int nd = 0; nd < coverageVector.size(); nd++) { if(coverageVector[nd]==0) { firstOG = nd; return firstOG; } } return firstOG; } string osmHypothesis :: intToString(int num) { return SPrint(num); } void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set & targetNullWords , vector & currF) { int gFlag = 0; int gp = 0; int ans; if ( j < j1) { // j1 is the index of the source word we are about to generate ... //if(coverageVector[j]==0) // if source word at j is not generated yet ... if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ... operations.push_back("_INS_GAP_"); gFlag++; gap[j]="Unfilled"; } if (j == E) { j = j1; } else { operations.push_back("_JMP_FWD_"); j=E; } } if (j1 < j) { // if(j < E && coverageVector[j]==0) if(j < E && coverageVector.GetValue(j)==0) { operations.push_back("_INS_GAP_"); gFlag++; gap[j]="Unfilled"; } j=closestGap(gap,j1,gp); operations.push_back("_JMP_BCK_"+ intToString(gp)); //cout<<"I am j "< 0) gapCount++; openGapCount += getOpenGaps(); //if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end()) if (j < coverageVector.GetSize()) { if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) { j1 = j; german = currF[j1-startIndex]; english = "_INS_"; generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF); } } } void osmHypothesis :: print() { for (int i = 0; i< operations.size(); i++) { cerr< gap, int j1, int & gp) { int dist=1172; int value=-1; int temp=0; gp=0; int opGap=0; map :: iterator iter; iter=gap.end(); do { iter--; //cout<<"Trapped "<first<first==j1 && iter->second== "Unfilled") { opGap++; gp = opGap; return j1; } if(iter->second =="Unfilled") { opGap++; temp = iter->first - j1; if(temp<0) temp=temp * -1; if(dist>temp && iter->first < j1) { dist=temp; value=iter->first; gp=opGap; } } } while(iter!=gap.begin()); return value; } int osmHypothesis :: getOpenGaps() { map :: iterator iter; int nd = 0; for (iter = gap.begin(); iter!=gap.end(); iter++) { if(iter->second == "Unfilled") nd++; } return nd; } void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set doneTargetIndexes) { operations.push_back("_DEL_" + english); currTargetIndex++; while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) { currTargetIndex++; } if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) { english = currE[currTargetIndex]; generateDeleteOperations(english,currTargetIndex,doneTargetIndexes); } } void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector) { set doneTargetIndexes; set eSide; set fSide; set :: iterator iter; string english; string source; int j1; int targetIndex = 0; doneTargetIndexes.clear(); if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ... iter = targetNullWords.begin(); if (*iter == startIndex) { j1 = startIndex; source = currF[j1-startIndex]; english = "_INS_"; generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF); } } if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ... english = currE[targetIndex]; generateDeleteOperations(english,targetIndex, doneTargetIndexes); } for (size_t i = 0; i < ceptsInPhrase.size(); i++) { source = ""; english = ""; fSide = ceptsInPhrase[i].first; eSide = ceptsInPhrase[i].second; iter = eSide.begin(); targetIndex = *iter; english += currE[*iter]; iter++; for (; iter != eSide.end(); iter++) { if(*iter == targetIndex+1) targetIndex++; else doneTargetIndexes.insert(*iter); english += "^_^"; english += currE[*iter]; } iter = fSide.begin(); source += currF[*iter]; iter++; for (; iter != fSide.end(); iter++) { source += "^_^"; source += currF[*iter]; } iter = fSide.begin(); j1 = *iter + startIndex; iter++; generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF); for (; iter != fSide.end(); iter++) { j1 = *iter + startIndex; generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF); } targetIndex++; // Check whether the next target word is unaligned ... while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) { targetIndex++; } if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) { english = currE[targetIndex]; generateDeleteOperations(english,targetIndex, doneTargetIndexes); } } //removeReorderingOperations(); //print(); } void osmHypothesis :: getMeCepts ( set & eSide , set & fSide , map > & tS , map > & sT) { set :: iterator iter; int sz = eSide.size(); vector t; for (iter = eSide.begin(); iter != eSide.end(); iter++) { t = tS[*iter]; for (size_t i = 0; i < t.size(); i++) { fSide.insert(t[i]); } } for (iter = fSide.begin(); iter != fSide.end(); iter++) { t = sT[*iter]; for (size_t i = 0 ; i sz) { getMeCepts(eSide,fSide,tS,sT); } } void osmHypothesis :: constructCepts(vector & align , int startIndex , int endIndex, int targetPhraseLength) { std::map > sT; std::map > tS; std::set eSide; std::set fSide; std::set :: iterator iter; std :: map > :: iterator iter2; std :: pair < set , set > cept; int src; int tgt; for (size_t i = 0; i < align.size(); i+=2) { src = align[i]; tgt = align[i+1]; tS[tgt].push_back(src); sT[src].push_back(tgt); } for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ... if (sT.find(i-startIndex) == sT.end()) { targetNullWords.insert(i); } } for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ... if (tS.find(i) == tS.end()) { sourceNullWords.insert(i); } } while (tS.size() != 0 && sT.size() != 0) { iter2 = tS.begin(); eSide.clear(); fSide.clear(); eSide.insert (iter2->first); getMeCepts(eSide, fSide, tS , sT); for (iter = eSide.begin(); iter != eSide.end(); iter++) { iter2 = tS.find(*iter); tS.erase(iter2); } for (iter = fSide.begin(); iter != fSide.end(); iter++) { iter2 = sT.find(*iter); sT.erase(iter2); } cept = make_pair (fSide , eSide); ceptsInPhrase.push_back(cept); } /* cerr<<"Extracted Cepts "< "; for (iter = fSide.begin(); iter != fSide.end(); iter++) { cerr<<*iter<<" "; } cerr<"<"< & scores , const int numFeatures) { scores.clear(); scores.push_back(opProb); if (numFeatures == 1) return; scores.push_back(gapWidth); scores.push_back(gapCount); scores.push_back(openGapCount); scores.push_back(deletionCount); } } // namespace