diff --git a/bin_search.md b/bin_search.md index 610be05..662840e 100644 --- a/bin_search.md +++ b/bin_search.md @@ -33,7 +33,7 @@ list.index()无法应对大规模数据的查询,需要用其它方法解决 def binarySearch(lst, value,low,high): #low,high是lst的查找范围 if high < low: return -1 - mid = (low + high)/2 + mid = int((low + high)/2) if lst[mid] > value: return binarySearch(lst, value, low, mid-1) elif lst[mid] < value: @@ -46,7 +46,7 @@ list.index()无法应对大规模数据的查询,需要用其它方法解决 def bsearch(l, value): lo, hi = 0, len(l)-1 while lo <= hi: - mid = (lo + hi) / 2 + mid = int((lo + hi) / 2) if l[mid] < value: lo = mid + 1 elif value < l[mid]: @@ -57,8 +57,8 @@ list.index()无法应对大规模数据的查询,需要用其它方法解决 if __name__ == '__main__': l = range(50) - print binarySearch(l,10,0,49) - print bsearch(l,10) + print(binarySearch(l,10,0,49)) + print(bsearch(l,10)) 对于python,不能忽视其强大的标准库。经查阅,发现标准库中就有一个模块,名为:bisect。其文档中有这样一句话: diff --git a/test.py b/test.py new file mode 100644 index 0000000..9a23168 --- /dev/null +++ b/test.py @@ -0,0 +1,43 @@ +def get_kNN_result(name): + # Analyze start + new_list = [] + new_vector = [] + cos_list = [] + result = [] + # Chinese participle + new_cut = jieba.cut_for_search(name) + # Generate new words list + for cut in new_cut: + if not (cut.strip() in stop_list): + new_list.append(cut) + # Generate vector by mapping + for c_name in corpus_names: + if not (c_name[0].strip() in new_list): + new_vector.append(0) + else: + new_vector.append(1) + # Get the list of Cosine Similarity + for fid, vector in failure_names: + cos = get_cos_similar(new_vector, list(eval(vector))) + cos_list.append((cos, fid)) + # Sort the list of last step and get top5 + k_nn5 = sorted(cos_list)[-5:] + k_nn5.reverse() # big to small + # Get id and similarity of top5 + case_collect5 = [] + for cos, fid in k_nn5: + case_collect5.append(fid) + print(case_collect5) + # Query the top5 cases + cursor.execute('select id,name,industry,mode from failurecase where id in ' + str(tuple(case_collect5))) + value_cases = cursor.fetchall() + print(value_cases) + # Sort the list(value_cases) + case_dict = {} + for item2 in value_cases: + case_dict[item2[0]] = {'name': item2[1], 'industry': item2[2], 'mode': item2[3]} + for item1 in case_collect5: + dict_item = case_dict[item1] + dict_item['id'] = item1 + result.append(dict_item) + return result