adiba-markovate commited on
Commit
5b52224
1 Parent(s): f82d615
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ #import sys
4
+ from PyPDF2 import PdfReader
5
+ import docx2txt
6
+ from transformers import pipeline
7
+ import pandas as pd
8
+
9
+
10
+ def fetch_pdf_doc_file(directory):
11
+ pdf_doc_file = []
12
+ for file in os.listdir(directory):
13
+ if file.endswith('.pdf') or file.endswith('.docx'):
14
+ temp = directory + "/" +file
15
+ pdf_doc_file.append(temp)
16
+ return pdf_doc_file
17
+
18
+ # extract texts from files
19
+ def extract_text(files_list):
20
+ reader = PdfReader()
21
+ for file in files_list:
22
+ text = ""
23
+ l = len(reader.pages)
24
+ for i in range(l):
25
+ page = reader.pages[i]
26
+ text += page.extract_text()
27
+ text = text.lower()
28
+ return text
29
+
30
+ #passing text for extracting skills
31
+ pipe = pipeline("token-classification", model="algiraldohe/lm-ner-linkedin-skills-recognition")
32
+ def skill_extract(text):
33
+ output = pipe(text)
34
+ technical_words = [entry['word'] for entry in output if entry['entity'] in ['B-TECHNICAL', 'I-TECHNICAL', 'B-TECHNOLOGY', 'I-TECHNOLOGY']]
35
+ l = len(technical_words)
36
+ index = 0 # Initialize the index variable
37
+ while index < l:
38
+ if technical_words[index].startswith("##"):
39
+ half = technical_words[index][2:]
40
+ technical_words[index-1] += half
41
+ technical_words.pop(index)
42
+ l -= 1 # Decrease the length of the list
43
+ else:
44
+ index += 1 # Move to the next word
45
+ technical_words = set(technical_words)
46
+ return technical_words
47
+
48
+
49
+ # function for matching and returning skills
50
+ def match(required_skills, resume_skills):
51
+ # Convert the skills lists to sets for efficient set operations
52
+ required_skills = set(required_skills)
53
+
54
+ # Find the common skills (matching skills)
55
+ matching_skills = required_skills.intersection(resume_skills)
56
+
57
+ # Calculate the score as a percentage
58
+ score_percentage = (len(matching_skills) / len(required_skills))*100
59
+ #Find the missing skills
60
+ missing_skills = required_skills.difference(resume_skills)
61
+ return missing_skills, score_percentage
62
+
63
+
64
+ # Define the list of required skills
65
+ required_skills = ["Python", "Java", "Django", "Machine Learning", "Data Science", "Communication", 'Natural language processing (nlp)']
66
+
67
+ # Create a list to store selected skills
68
+ selected_skills = []
69
+
70
+ # Streamlit UI
71
+ st.title("TalentMatch")
72
+ st.header("Select the required skills")
73
+
74
+ # Use st.columns to create three columns
75
+ col1, col2, col3 = st.columns(3)
76
+
77
+ # Display checkboxes for each skill in three columns
78
+ for i, skill in enumerate(required_skills):
79
+ if i % 3 == 0:
80
+ checkbox = col1.checkbox(skill)
81
+ elif i % 3 == 1:
82
+ checkbox = col2.checkbox(skill)
83
+ else:
84
+ checkbox = col3.checkbox(skill)
85
+
86
+ if checkbox:
87
+ selected_skills.append(skill)
88
+
89
+
90
+ pdf_docs = st.file_uploader("upload your files and click on process", accept_multiple_files = True)
91
+ if selected_skills and pdf_docs:
92
+ if st.button("Process"):
93
+ st.write("Processing...")
94
+
95
+
96
+ result_data = []
97
+ # iterating over each file
98
+ for file in pdf_docs:
99
+ text = extract_text(pdf_docs)
100
+ print(text)
101
+ resume_skills = skill_extract(text)
102
+ missing_skills, score = match(required_skills, resume_skills)
103
+ result_data.append({"File": file, "Score": score+"%", "Missing Skills": missing_skills})
104
+
105
+ # create a dataframe
106
+ df = pd.DataFrame(result_data)
107
+ #sort the data frame according to the score
108
+ #df = df.sort_values(by = "Score", ascending = False)
109
+
110
+ # display the result table
111
+ st.subheader("Processing Completed")
112
+ st.subheader("RESULT")
113
+ st.table(df)
114
+
115
+
116
+