Mahout实现基于性别的IDRescorer

来源:互联网 发布:哥们傲剑数据 编辑:程序博客网 时间:2024/06/02 16:51
<span style="font-size:18px;">/*** * @author YangXin * @info 基于性别的IDRscorer * 对于在乎性别的用户,IDRscorer能够对物品或用户档案进行过滤。 * 首先,可以先通过检查已经评价过的档案的性别,来猜测该用户所偏好 * 的性别。然后,就可以过滤与之性别相反的档案。 */package unitFive;import java.io.File;import java.io.IOException;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.impl.common.FastIDSet;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.model.PreferenceArray;import org.apache.mahout.cf.taste.recommender.IDRescorer;import org.apache.mahout.common.iterator.FileLineIterable;public class GenderRescorer implements IDRescorer {/*** * 缓存更对对男性评价的用户 */private final FastIDSet men;private final FastIDSet women;private final FastIDSet usersRateMoreMen;private final FastIDSet usersRateLessMen;private final boolean filterMen;/** * 构造函数 */public GenderRescorer(FastIDSet men, FastIDSet women, FastIDSet usersRateMoreMen, FastIDSet usersRateLessMen, long userID, DataModel model) throws TasteException{// TODO Auto-generated constructor stubthis.men = men;this.women = women;this.usersRateMoreMen = usersRateMoreMen;this.usersRateLessMen = usersRateLessMen;this.filterMen = ratesMoreMen(userID, model);}/** * 解析gender.dat并创建两个档案ID *  */public static FastIDSet[] parseMenWomen(File genderFile) throws IOException{FastIDSet men = new FastIDSet(50000);FastIDSet women = new FastIDSet(50000);for(String line : new FileLineIterable(genderFile)){int comma = line.indexOf(',');char gender = line.charAt(comma + 1);if(gender == 'U'){continue;}long profileID = Long.parseLong(line.substring(0, comma));if(gender == 'M'){men.add(profileID);}else{women.add(profileID);}}men.rehash();                           //刷新women.rehash(); //刷新return new FastIDSet[]{men, women};}public boolean ratesMoreMen(long userID, DataModel model) throws TasteException{if(usersRateMoreMen.contains(userID)){return true;}if(usersRateLessMen.contains(userID)){return false;}PreferenceArray prefs = model.getPreferencesFromUser(userID);int menCount = 0;int womenCount = 0;for(int i = 0; i < prefs.length(); i++){long profileID = prefs.get(i).getItemID();if(men.contains(profileID)){menCount++;}else if(women.contains(profileID)){womenCount++;}}boolean ratesMoreMen = menCount > womenCount;          //对男性评分的用户可能更喜欢男性if(ratesMoreMen){usersRateMoreMen.add(userID);}else{usersRateLessMen.add(userID);}return ratesMoreMen;}@Overridepublic double rescore(long profileID, double originalScore) {// TODO Auto-generated method stubreturn isFiltered(profileID) ? Double.NaN : originalScore;         //将被排除的值赋值为NaN}@Overridepublic boolean isFiltered(long profileID) {// TODO Auto-generated method stubreturn filterMen ? men.contains(profileID) : women.contains(profileID);}}</span>

0 0
原创粉丝点击