Source code for similarity_check.Similarity



[docs] class Similarity: """ The class provides various similarity and dissimilarity methods including jaccard_coefficient,overlap_coefficient and hamming_distance Attributes: ---------- str1 :str The first attribute for comparison. This can be any string that will be converted into a set. str2 :str The second attribute for comparison. This can be any string that will be converted into a set. Methods ------- jaccard_coefficient( str1, str2) Calculates the Jaccard similarity coefficient between two sets. overlap_coefficient(str1, str2) Calculates the Overlap coefficient between two sets. hamming_distance(str1, str2) Calculates the Hamming distance between two strings. """ def __init__(self): """ Initializes the Similarity object. Parameters ---------- None Return --------- None """ return
[docs] def jaccard_coefficient(self, str1, str2): """ Computes the Jaccard similarity coefficient between two sets. The Jaccard coefficient is defined as the size of the intersection divided by the size of the union of the sets. Parameters ---------- str1 : str The first string, which will be converted to a set. str2 : str The second string, which will be converted to a set. Returns ------- float The Jaccard similarity coefficient. Returns 0 if both sets are empty """ set1=set(str1) set2=set(str2) intersection = len(set1.intersection(set2)) union = len(set1.union(set2)) return intersection / union if union != 0 else 0
[docs] def overlap_coefficient(self, str1, str2): """ Computes the Overlap coefficient between two sets. The Overlap coefficient is defined as the size of the intersection divided by the size of the smaller set. Parameters ---------- str1 : str The first string, which will be converted to a set. str2 : str The second string, which will be converted to a set. Returns ------- float The Overlap coefficient. Returns 0 if either set is empty. """ set1=set(str1) set2=set(str2) intersection = len(set1.intersection(set2)) smaller_set_size = min(len(set1), len(set2)) return intersection / smaller_set_size if smaller_set_size > 0 else 0
[docs] def hamming_distance(self,str1, str2): """ Computes the Hamming distance between two strings. The Hamming distance is defined as the number of positions at which the corresponding symbols in two strings of equal length are different. Parameters ---------- str1 : str The first string for comparison. str2 : str The second string for comparison. Returns ------- int The Hamming distance between the two strings.Returns -1 if the strings have different lengths. """ if len(str1) != len(str2): return -1 distance = sum(el1 != el2 for el1, el2 in zip(str1, str2)) return distance
help(hamming_distance)