education-webapp / model.py
anmolmalik01's picture
Adding files
53dc870
raw
history blame
No virus
1.56 kB
from transformers import pipeline
from transformers import ( TokenClassificationPipeline, AutoModelForTokenClassification, AutoTokenizer)
from transformers.pipelines import AggregationStrategy
import numpy as np
# ================================= summarize code =================================
def summerize(text):
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
text_len = len(text.split(' '))
max_length = int((text_len * 80)/100)
min_length = int((text_len * 30)/100)
return{"output": summarizer(text, max_length=max_length, min_length=min_length, do_sample=True)}
# =================================s keywords code =================================
# Define keyphrase extraction pipeline
# class KeyphraseExtractionPipeline(TokenClassificationPipeline):
# def __init__(self, model, *args, **kwargs):
# super().__init__(
# model=AutoModelForTokenClassification.from_pretrained(model),
# tokenizer=AutoTokenizer.from_pretrained(model),
# *args,
# **kwargs
# )
# def postprocess(self, all_outputs):
# results = super().postprocess(
# all_outputs=all_outputs,
# aggregatsion_strategy=AggregationStrategy.FIRST,
# )
# return np.unique([result.get("word").strip() for result in results])
# # Load pipeline
# model_name = "ml6team/keyphrase-extraction-distilbert-inspec"
# extractor = KeyphraseExtractionPipeline(model=model_name)
# keyphrases = extractor(text)
# print(keyphrases)