Spaces:

srikanththirumani
/

Plagiarism-checker

Running

App Files Files Community

srikanththirumani commited on 3 days ago

Commit

8c61de9

•

1 Parent(s): 5ae9fd6

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -17

app.py CHANGED Viewed

@@ -10,8 +10,6 @@ import spacy
 import matplotlib.pyplot as plt
 import io
 import base64
-from sklearn.feature_extraction.text import TfidfVectorizer
-import numpy as np
 # Load spaCy model for semantic analysis
 nlp = spacy.load("en_core_web_md")
@@ -37,11 +35,7 @@ def preprocess_text(text):
     tokens = word_tokenize(text)
     return [word for word in tokens if word not in stop_words]
-def calculate_word_similarity(text1, text2):
-    words1 = preprocess_text(text1)
-    words2 = preprocess_text(text2)
-    vec1 = Counter(words1)
-    vec2 = Counter(words2)
     intersection = set(vec1.keys()) & set(vec2.keys())
     numerator = sum([vec1[x] * vec2[x] for x in intersection])
     sum1 = sum([vec1[x]**2 for x in vec1.keys()])
@@ -50,17 +44,29 @@ def calculate_word_similarity(text1, text2):
     if not denominator:
         return 0.0
     else:
-        return float(numerator) / denominator * 100
 def calculate_sentence_similarity(text1, text2):
     sentences1 = sent_tokenize(text1)
     sentences2 = sent_tokenize(text2)
-    tfidf_vectorizer = TfidfVectorizer()
-    tfidf_matrix1 = tfidf_vectorizer.fit_transform(sentences1)
-    tfidf_matrix2 = tfidf_vectorizer.transform(sentences2)
-    cosine_similarities = (tfidf_matrix1 * tfidf_matrix2.T).A
-    avg_similarity = np.mean(np.max(cosine_similarities, axis=1)) * 100
-    return avg_similarity
 def semantic_similarity(text1, text2):
     doc1 = nlp(text1)
@@ -74,15 +80,15 @@ def longest_common_subsequence(text1, text2):
     L = [[0] * (n + 1) for _ in range(m + 1)]
     for i in range(1, m + 1):
         for j in range(1, n + 1):
-            if sentences1[i-1] == sentences2[j-1]:
                 L[i][j] = L[i-1][j-1] + 1
             else:
                 L[i][j] = max(L[i-1][j], L[i][j-1])
     lcs = []
     i, j = m, n
     while i > 0 and j > 0:
-        if sentences1[i-1] == sentences2[j-1]:
-            lcs.append(sentences1[i-1])
             i -= 1
             j -= 1
         elif L[i-1][j] > L[i][j-1]:

 import matplotlib.pyplot as plt
 import io
 import base64
 # Load spaCy model for semantic analysis
 nlp = spacy.load("en_core_web_md")
     tokens = word_tokenize(text)
     return [word for word in tokens if word not in stop_words]
+def cosine_similarity(vec1, vec2):
     intersection = set(vec1.keys()) & set(vec2.keys())
     numerator = sum([vec1[x] * vec2[x] for x in intersection])
     sum1 = sum([vec1[x]**2 for x in vec1.keys()])
     if not denominator:
         return 0.0
     else:
+        return float(numerator) / denominator
+def calculate_word_similarity(text1, text2):
+    words1 = preprocess_text(text1)
+    words2 = preprocess_text(text2)
+    vec1 = Counter(words1)
+    vec2 = Counter(words2)
+    similarity = cosine_similarity(vec1, vec2)
+    return similarity * 100
 def calculate_sentence_similarity(text1, text2):
     sentences1 = sent_tokenize(text1)
     sentences2 = sent_tokenize(text2)
+    similarities = []
+    for sent1 in sentences1:
+        max_similarity = 0
+        for sent2 in sentences2:
+            similarity = calculate_word_similarity(sent1, sent2)
+            if similarity > max_similarity:
+                max_similarity = similarity
+        similarities.append(max_similarity)
+    average_similarity = sum(similarities) / len(similarities) if similarities else 0.0
+    return average_similarity
 def semantic_similarity(text1, text2):
     doc1 = nlp(text1)
     L = [[0] * (n + 1) for _ in range(m + 1)]
     for i in range(1, m + 1):
         for j in range(1, n + 1):
+            if sentences2[j-1] in sentences1:
                 L[i][j] = L[i-1][j-1] + 1
             else:
                 L[i][j] = max(L[i-1][j], L[i][j-1])
     lcs = []
     i, j = m, n
     while i > 0 and j > 0:
+        if sentences2[j-1] in sentences1:
+            lcs.append(sentences2[j-1])
             i -= 1
             j -= 1
         elif L[i-1][j] > L[i][j-1]: