What is the best way to run this model?

#1
by ArthurParkerhouse - opened

From the original IBM github page the following code is what they recommend.


from transformers import pipeline

class QualityControlPipeline:

def __init__(self, type):
    assert type in ['captions', 'questions', 'sentences']
    self.pipe = pipeline('text2text-generation', model=f'ibm/qcpg-{type}')
    self.ranges = {
        'captions': {'lex': [0, 90], 'syn': [0, 80], 'sem': [0, 95]},
        'sentences': {'lex': [0, 100], 'syn': [0, 80], 'sem': [0, 95]},
        'questions': {'lex': [0, 90], 'syn': [0, 75], 'sem': [0, 95]}
    }[type]

def __call__(self, text, lexical, syntactic, semantic, **kwargs):
    assert all([0 <= val <= 1 for val in [lexical, syntactic, semantic]]), \
             f' control values must be between 0 and 1, got {lexical}, {syntactic}, {semantic}'
    names = ['semantic_sim', 'lexical_div', 'syntactic_div']
    control = [int(5 * round(val * 100 / 5)) for val in [semantic, lexical, syntactic]]
    control ={name: max(min(val , self.ranges[name[:3]][1]), self.ranges[name[:3]][0]) for name, val in zip(names, control)}
    control = [f'COND_{name.upper()}_{control[name]}' for name in names]
    assert all(cond in self.pipe.tokenizer.additional_special_tokens for cond in control)
    text = ' '.join(control) + text if isinstance(text, str) else [' '.join(control) for t in text]
    return self.pipe(text, **kwargs)

What would we need to change/update for this version that has additional parameters?

You can use this:

from transformers import pipeline

class QualityControlPipeline:
    def __init__(self):
        self.pipe = pipeline('text2text-generation', model='madhavsankar/qcpg-parabk2-sbert-lr1e-4')
        self.ranges = {'lex': [0, 100], 'syn': [0, 100], 'sem': [30, 100], 'pho': [0, 100], 'mor': [0, 90]}

    def __call__(self, text, lexical, syntactic, semantic, morph, phon, **kwargs):
        assert all([0 <= val <= 1 for val in [lexical, syntactic, semantic, morph, phon,]]), \
                 f' control values must be between 0 and 1, got {lexical}, {syntactic}, {semantic}, {morph}, {phon}'
        names = ['semantic_sim', 'lexical_div', 'syntactic_div', 'morphological_div', 'phonological_div']
        control = [int(5 * round(val * 100 / 5)) for val in [semantic, lexical, syntactic, morph, phon]]
        control ={name: max(min(val , self.ranges[name[:3]][1]), self.ranges[name[:3]][0]) for name, val in zip(names, control)}
        control = [f'COND_{name.upper()}_{control[name]}' for name in names]
        assert all(cond in self.pipe.tokenizer.additional_special_tokens for cond in control)
        text = ' '.join(control) + ' ' + text if isinstance(text, str) else [' '.join(control) for t in text]
        return self.pipe(text, **kwargs)
This comment has been hidden

You can use this:

I worked up a quick Colab demo, thanks so much for sharing the model! - https://colab.research.google.com/drive/1qbtlwjovfgjruPEKW35YUmjQzh49i8mh?usp=sharing

Sign up or log in to comment