from transformers import pipeline from transformers import ( TokenClassificationPipeline, AutoModelForTokenClassification, AutoTokenizer) from transformers.pipelines import AggregationStrategy import numpy as np # ================================= summarize code ================================= def summerize(text): summarizer = pipeline("summarization", model="Falconsai/text_summarization") text_len = len(text.split(' ')) max_length = int((text_len * 80)/100) min_length = int((text_len * 30)/100) return{"output": summarizer(text, max_length=max_length, min_length=min_length, do_sample=True)} # =================================s keywords code ================================= # Define keyphrase extraction pipeline # class KeyphraseExtractionPipeline(TokenClassificationPipeline): # def __init__(self, model, *args, **kwargs): # super().__init__( # model=AutoModelForTokenClassification.from_pretrained(model), # tokenizer=AutoTokenizer.from_pretrained(model), # *args, # **kwargs # ) # def postprocess(self, all_outputs): # results = super().postprocess( # all_outputs=all_outputs, # aggregatsion_strategy=AggregationStrategy.FIRST, # ) # return np.unique([result.get("word").strip() for result in results]) # # Load pipeline # model_name = "ml6team/keyphrase-extraction-distilbert-inspec" # extractor = KeyphraseExtractionPipeline(model=model_name) # keyphrases = extractor(text) # print(keyphrases)