Spaces:

adiba-markovate
/

resume-matcher

Configuration error

App Files Files Community

adiba-markovate commited on Oct 11, 2023

Commit

5b52224

•

1 Parent(s): f82d615

app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import streamlit as st
+import os
+#import sys
+from PyPDF2 import PdfReader
+import docx2txt
+from transformers import pipeline
+import pandas as pd
+def fetch_pdf_doc_file(directory):
+  pdf_doc_file = []
+  for file in os.listdir(directory):
+    if file.endswith('.pdf') or file.endswith('.docx'):
+      temp = directory + "/" +file
+      pdf_doc_file.append(temp)
+  return pdf_doc_file
+# extract texts from files
+def extract_text(files_list):
+  reader = PdfReader()
+  for file in files_list:
+    text = ""
+    l = len(reader.pages)
+    for i in range(l):
+      page = reader.pages[i]
+      text += page.extract_text()
+      text = text.lower()
+  return text
+#passing text for extracting skills
+pipe = pipeline("token-classification", model="algiraldohe/lm-ner-linkedin-skills-recognition")
+def skill_extract(text):
+  output = pipe(text)
+  technical_words = [entry['word'] for entry in output if entry['entity'] in ['B-TECHNICAL', 'I-TECHNICAL', 'B-TECHNOLOGY', 'I-TECHNOLOGY']]
+  l = len(technical_words)
+  index = 0  # Initialize the index variable
+  while index < l:
+        if technical_words[index].startswith("##"):
+            half = technical_words[index][2:]
+            technical_words[index-1] += half
+            technical_words.pop(index)
+            l -= 1  # Decrease the length of the list
+        else:
+            index += 1  # Move to the next word
+  technical_words = set(technical_words)
+  return technical_words
+# function for matching and returning skills
+def match(required_skills, resume_skills):
+  # Convert the skills lists to sets for efficient set operations
+  required_skills = set(required_skills)
+# Find the common skills (matching skills)
+  matching_skills = required_skills.intersection(resume_skills)
+# Calculate the score as a percentage
+  score_percentage = (len(matching_skills) / len(required_skills))*100
+   #Find the missing skills
+  missing_skills = required_skills.difference(resume_skills)
+  return missing_skills, score_percentage
+# Define the list of required skills
+required_skills = ["Python", "Java", "Django", "Machine Learning", "Data Science", "Communication", 'Natural language processing (nlp)']
+# Create a list to store selected skills
+selected_skills = []
+# Streamlit UI
+st.title("TalentMatch")
+st.header("Select the required skills")
+# Use st.columns to create three columns
+col1, col2, col3 = st.columns(3)
+# Display checkboxes for each skill in three columns
+for i, skill in enumerate(required_skills):
+    if i % 3 == 0:
+        checkbox = col1.checkbox(skill)
+    elif i % 3 == 1:
+        checkbox = col2.checkbox(skill)
+    else:
+        checkbox = col3.checkbox(skill)
+    if checkbox:
+        selected_skills.append(skill)
+pdf_docs = st.file_uploader("upload your files and click on process", accept_multiple_files = True)
+if selected_skills and pdf_docs:
+    if st.button("Process"):
+       st.write("Processing...")
+result_data = []
+# iterating over each file
+for file in pdf_docs:
+    text = extract_text(pdf_docs)
+    print(text)
+    resume_skills = skill_extract(text)
+    missing_skills, score = match(required_skills, resume_skills)
+    result_data.append({"File": file, "Score": score+"%", "Missing Skills": missing_skills})
+# create a dataframe
+df = pd.DataFrame(result_data)
+#sort the data frame according to the score
+#df = df.sort_values(by = "Score", ascending = False)
+# display the result table
+st.subheader("Processing Completed")
+st.subheader("RESULT")
+st.table(df)