[프로그래머스] 뉴스 클러스터링
#include <string>
#include <map>
#include <vector>
using namespace std;
int solution(string str1, string str2)
{
int answer = 0;
string clean_str1 = "";
string clean_str2 = "";
//둘 다 소문자로 바꾸고
for (char c : str1)
clean_str1 += tolower(c);
for (char c : str2)
clean_str2 += tolower(c);
//비교
map<string, int> m1;
map<string, int> m2;
vector<string> v1;
vector<string> v2;
for (int i = 0; i < clean_str1.length() - 1; i++)
{
string tmp = "";
if(isalpha(clean_str1[i]) && isalpha(clean_str1[i+1])){
tmp += clean_str1[i];
tmp += clean_str1[i + 1];
v1.push_back(tmp);
m1[tmp]++;
}
}
for (int i = 0; i < clean_str2.length() - 1; i++)
{
string tmp = "";
if(isalpha(clean_str2[i]) && isalpha(clean_str2[i+1])){
tmp += clean_str2[i];
tmp += clean_str2[i + 1];
v2.push_back(tmp);
m2[tmp]++;
}
}
double cnt_inter = 0; //교집합
double cnt_union = 0; //합집합
int total_size = v1.size() + v2.size();
if(total_size == 0){
return 65536;
}
//합집합, 교집합 찾기
if (v1.size() < v2.size())
{
for (int i = 0; i < v2.size(); i++)
{
string cmp = v2[i];
if(m1[cmp]){
if(m1[cmp] < m2[cmp]){
cnt_inter += m1[cmp];
m1[cmp] = 0;
}else{
cnt_inter += m2[cmp];
m2[cmp] = 0;
}
}
}
}else{
for (int i = 0; i < v1.size(); i++)
{
string cmp = v1[i];
if(m2[cmp]){
if(m1[cmp] < m2[cmp]){
cnt_inter += m1[cmp];
m1[cmp] = 0;
}else{
cnt_inter += m2[cmp];
m2[cmp] = 0;
}
}
}
}
cnt_union = total_size - cnt_inter;
double jacard = cnt_inter / cnt_union;
return jacard * 65536;
}