|
--- |
|
license: apache-2.0 |
|
datasets: |
|
- erfanzar/Data-60K |
|
- erfanzar/CC-OASST-1-EVAL |
|
language: |
|
- en |
|
- fr |
|
- fa |
|
- nl |
|
metrics: |
|
- bertscore |
|
pipeline_tag: text-generation |
|
--- |
|
|
|
# OpenSourceTransformers-OST Project |
|
|
|
[OST-OpenSourceTransformers Github](https://github.com/erfanzar/OST-OpenSourceTransformers) |
|
|
|
## NOTE |
|
|
|
Model Version 2 Released and you can use model with built in gradio [interface](https://github.com/erfanzar/OST-OpenSourceTransformers/blob/main/OST_UI/app.py) |
|
|
|
|
|
The Pythia Suite is **NOT** intended for deployment. It is not in itself |
|
a product and cannot be used for human-facing interactions. For example, |
|
the model may generate harmful or offensive text... |
|
|
|
|
|
and also remember that this model is not good enough for Persian, French, and Dutch at least for this version |
|
|
|
this model had same traning parameters as [PGT-1B-2EP](https://huggingface.co/erfanzar/PGT-1B-2EP) but finetuned on more custom datas but |
|
they both work kinda same i suggest you to test both of models and pick the one you like the most |
|
|
|
|
|
## Hello community |
|
|
|
this model can also run on 4 GB GPU RAM and know dialogs as well |
|
|
|
## Usage Code |
|
|
|
```python |
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, PreTrainedTokenizer, logging, BloomModel |
|
import torch |
|
import textwrap |
|
import os |
|
from dataclasses import field, dataclass |
|
from transformers import HfArgumentParser, GPTNeoXForCausalLM |
|
import gradio as gr |
|
import speech_recognition as sr |
|
from typing import List, Optional |
|
import copy |
|
import whisper |
|
|
|
logger = logging.get_logger(__name__) |
|
logging.set_verbosity_info() |
|
|
|
|
|
@dataclass |
|
class LoadConfig: |
|
mode: str = field(default='gui-chat', metadata={'help': 'mode to use ai in '}) |
|
model_id: str = field(default='erfanzar/PGT-1B', metadata={'help': 'model to load'}) |
|
load_model: bool = field(default=True, metadata={'help': "load model set to false for debug mode"}) |
|
torch_type: torch.dtype = field(default=torch.float16, metadata={'help': "data type"}) |
|
load_in_8bit: bool = field(default=False, |
|
metadata={ |
|
'help': "load model in 8 bit to make the models smaller " |
|
"and faster but its not recommended 😀 "}) |
|
whisper_model: str = field(default='base', metadata={'help': 'model to load for whisper '}) |
|
|
|
|
|
def load_model(config: LoadConfig): |
|
logger.info(f'Loading model FROM : {config.model_id}') |
|
_model = AutoModelForCausalLM.from_pretrained( |
|
config.model_id, |
|
load_in_8bit=config.load_in_8bit, |
|
torch_dtype=config.torch_type, |
|
) if config.load_model else None |
|
model_whisper = whisper.load_model(config.whisper_model) |
|
logger.info( |
|
f'Done Loading Model with {(sum(m.numel() for m in _model.parameters()) / 1e9) if _model is not None else "NONE"} Billion Parameters') |
|
logger.info(f'Loading Tokenizer FROM : {config.model_id}') |
|
_tokenizer = AutoTokenizer.from_pretrained(config.model_id) |
|
|
|
logger.info('Done Loading Tokenizer') |
|
return _model, _tokenizer, model_whisper |
|
|
|
|
|
def prompt_to_instruction(text: str): |
|
return f"<|prompter|> {text} <|endoftext|><|assistant|>" |
|
|
|
|
|
def generate(model: AutoModelForCausalLM, tokenizer, text: str, max_new_tokens: int = 1024, |
|
use_prompt_to_instruction: bool = False, generation_config=None, |
|
b_pair=False): |
|
text = prompt_to_instruction(text) if use_prompt_to_instruction else text |
|
|
|
for i in range(max_new_tokens): |
|
enc = tokenizer(text, return_tensors='pt', add_special_tokens=False) |
|
text_r = text |
|
enc = model.generate(enc.input_ids.to(model.device), generation_config=generation_config) |
|
text = tokenizer.decode(enc[0], skip_special_tokens=False) |
|
text = text[:-4] + tokenizer.eos_token if text[-4:] == '\n\n\n\n' else text |
|
if text.endswith(tokenizer.eos_token) or text.endswith('\n\n\n\n'): |
|
yield text[len(text_r):] if b_pair else text |
|
break |
|
else: |
|
yield text[len(text_r):] if b_pair else text |
|
|
|
|
|
def verify_text(txt): |
|
return '\n'.join([textwrap.fill(txt, width=110) for txt in txt.split('\n')]) |
|
|
|
|
|
def conversation(model, tokenizer, cache=None, max_new_tokens=512, byte_pair=False): |
|
cache = '' if cache is None else cache |
|
while True: |
|
user = cache + prompt_to_instruction(input('>> ')) |
|
last_a = 'NONE' |
|
for text in generate(model, tokenizer, text=user, max_new_tokens=max_new_tokens, b_pair=byte_pair, |
|
use_prompt_to_instruction=False): |
|
os.system('clear') |
|
print(verify_text(text). |
|
replace('<|prompter|>', 'User : '). |
|
replace('<|endoftext|><|assistant|>', '\nAI :'). |
|
replace('<|endoftext|>', '\n'), end='') |
|
last_a = text |
|
cache += last_a[len(cache):] |
|
|
|
|
|
class Conversation: |
|
def __init__(self, model, tokenizer, config): |
|
self.model: AutoModelForCausalLM = model |
|
self.tokenizer: PreTrainedTokenizer = tokenizer |
|
self.config: LoadConfig = config |
|
|
|
def run(self, text, |
|
cache, max_length, temperature, top_p, top_k, |
|
repetition_penalty |
|
): |
|
opt = sort_cache_pgt(cache) |
|
original_text = text |
|
text = opt + prompt_to_instruction(text) |
|
final_res = '' |
|
generation_config = GenerationConfig( |
|
eos_token_id=self.tokenizer.eos_token_id, |
|
bos_token_id=self.tokenizer.bos_token_id, |
|
pad_token_id=self.tokenizer.pad_token_id, |
|
max_new_tokens=1, |
|
max_length=max_length, |
|
temperature=temperature, |
|
top_p=top_p, |
|
top_k=top_k, |
|
repetition_penalty=repetition_penalty |
|
) |
|
for byte in generate(self.model, self.tokenizer, text=text, b_pair=False, |
|
generation_config=generation_config, |
|
use_prompt_to_instruction=False): |
|
final_res = byte |
|
yield byte[len(text):].replace('<|endoftext|>', '') |
|
answer = final_res[len(text):len(final_res) - len('<|endoftext|>')] |
|
cache.append([original_text, answer]) |
|
return '', cache |
|
|
|
|
|
def sort_cache_pgt(cache_): |
|
if len(cache_) == 0: |
|
opt = '' |
|
else: |
|
opt = '' |
|
for f in cache_: |
|
opt += f"<|prompter|>{f[0]}<|endoftext|><|assistant|>{f[1]}<|endoftext|>" |
|
|
|
return opt |
|
|
|
|
|
def sort_cache_lgem(cache_): |
|
if len(cache_) == 0: |
|
opt = '' |
|
else: |
|
opt = '' |
|
for f in cache_: |
|
opt += f"User:{f[0]}\nAI:{f[1]}" |
|
|
|
return opt |
|
|
|
|
|
def chat_bot_run(text: str, cache, max_new_tokens, |
|
max_length, |
|
temperature, |
|
top_p, |
|
top_k, |
|
repetition_penalty, |
|
voice): |
|
if voice is not None: |
|
text_rec = whisper_model.transcribe(voice)['text'] |
|
if text == '': |
|
text = text_rec |
|
|
|
opt = sort_cache_pgt(cache) |
|
original_text = text |
|
text = opt + prompt_to_instruction(text) |
|
final_res = '' |
|
generation_config = GenerationConfig( |
|
max_length=max_length, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id, |
|
bos_token_id=tokenizer.bos_token_id |
|
) |
|
# cache_f = copy.deepcopy(cache) |
|
cache_f = cache |
|
cache_f.append([original_text, '']) |
|
if model is not None: |
|
|
|
for byte in generate(model, tokenizer, text=text, b_pair=False, |
|
generation_config=generation_config, max_new_tokens=max_length, |
|
use_prompt_to_instruction=False): |
|
final_res = byte |
|
chosen_byte = byte[len(text):].replace('<|endoftext|>', '') |
|
print(chosen_byte) |
|
cache_f[-1][1] = chosen_byte |
|
yield '', cache_f |
|
answer = final_res[len(text):len(final_res) - len('<|endoftext|>')] |
|
else: |
|
answer = 'It seems like im down or im not loaded yet 😇' |
|
cache.append([original_text, answer]) |
|
return '', cache |
|
|
|
|
|
def gradio_ui(main_class_conversation): |
|
interface = gr.Interface(fn=main_class_conversation.run, outputs='text', |
|
inputs=[gr.inputs.Textbox(lines=10, placeholder='Im just a placeholder ignore me ... '), |
|
gr.inputs.Slider(default=1024, maximum=1024, minimum=1, label='Max Length'), |
|
gr.inputs.Slider(default=0.9, maximum=1, minimum=0.2, label='Temperature'), |
|
gr.inputs.Slider(default=0.95, maximum=0.9999, minimum=0.1, label='Top P'), |
|
gr.inputs.Slider(default=50, maximum=100, minimum=1, label='Top K'), |
|
gr.inputs.Slider(default=1.2, maximum=5, minimum=1, |
|
label='Repetition Penalty')]) |
|
interface.queue() |
|
interface.launch(share=True) |
|
|
|
def gradio_ui_chat(main_class_conversation: Conversation): |
|
theme = gr.themes.Soft( |
|
primary_hue="cyan", |
|
secondary_hue="teal", |
|
neutral_hue=gr.themes.Color(c100="#f3f4f6", c200="#e5e7eb", c300="#d1d5db", |
|
c400="#9ca3af", c50="#f9fafb", c500="#6b7280", |
|
c600="#4b5563", c700="#374151", c800="#1f2937", |
|
c900="#47a9c2", c950="#0b0f19"), |
|
) |
|
|
|
with gr.Blocks( |
|
theme=theme) as block: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
max_length = gr.Slider(value=1024, maximum=1024, minimum=1, label='Max Length', step=1) |
|
max_steam_tokens = gr.Slider(value=1, maximum=3, minimum=1, label='Max Stream Tokens', step=1) |
|
temperature = gr.Slider(value=0.9, maximum=1, minimum=0.2, label='Temperature', step=0.01) |
|
top_p = gr.Slider(value=0.95, maximum=0.9999, minimum=0.1, label='Top P', step=0.01) |
|
top_k = gr.Slider(value=50, maximum=100, minimum=1, label='Top K', step=1) |
|
penalty = gr.Slider(value=1.2, maximum=5, minimum=1, label='Repetition Penalty', step=0.1, visible=True) |
|
# TODO |
|
penalty_ = gr.Slider(value=1.2, maximum=10, minimum=1, label='Repetition', step=0.1, visible=True) |
|
gre_mode = gr.Checkbox(label='Greedy Mode') |
|
smart_mode = gr.Checkbox(label='Smart Mode') |
|
informational_mode = gr.Checkbox(label='Informational Mode') |
|
voice = gr.Audio(source='microphone', type="filepath", streaming=False, label='Smart Voice', ) |
|
with gr.Column(scale=4): |
|
cache = gr.Chatbot(elem_id=main_class_conversation.config.model_id, |
|
label=main_class_conversation.config.model_id).style(container=True, |
|
height=680) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
submit = gr.Button() |
|
with gr.Column(scale=4): |
|
text = gr.Textbox(show_label=False).style(container=False) |
|
|
|
submit.click(fn=chat_bot_run, |
|
inputs=[text, cache, max_steam_tokens, max_length, temperature, top_p, top_k, penalty, voice], |
|
outputs=[text, cache]) |
|
text.submit(fn=chat_bot_run, |
|
inputs=[text, cache, max_steam_tokens, max_length, temperature, top_p, top_k, penalty, voice], |
|
outputs=[text, cache]) |
|
gr.Markdown( |
|
'LucidBrains is a platform that makes AI accessible and easy to use for everyone. ' |
|
'Our mission is to empower individuals and businesses ' |
|
'with the tools they need to harness the power of AI and machine learning,' |
|
'without requiring a background in data science or anything we ' |
|
'will just build what you want for you and help you to have better time and living life' |
|
'with using Artificial Intelligence and Pushing Technology Beyond Limits' |
|
'\n[OST-OpenSourceTransformers](https://github.com/erfanzar/OST-OpenSourceTransformers) From LucidBrains 🧠\n' |
|
) |
|
block.queue().launch(debug=False, share=True, inline=True, show_tips=True, width='100%') |
|
|
|
|
|
def main(config): |
|
mcc = Conversation(model=model, tokenizer=tokenizer, config=config) |
|
if config.mode == 'cli': |
|
conversation(model=model, tokenizer=tokenizer) |
|
if config.mode == 'gui': |
|
gradio_ui(main_class_conversation=mcc) |
|
if config.mode == 'gui-chat': |
|
gradio_ui_chat(main_class_conversation=mcc) |
|
else: |
|
raise ValueError(f'Unknown Mode For : {config.mode}') |
|
|
|
|
|
if __name__ == "__main__": |
|
config_ = HfArgumentParser(LoadConfig).parse_args_into_dataclasses()[0] |
|
# config_ = LoadConfig() |
|
print(f'Running WITH MODE : {config_.mode}') |
|
model, tokenizer, whisper_model = load_model(config=config_) |
|
model = model.cuda() |
|
whisper_model = whisper_model.cuda() |
|
main(config_) |
|
|
|
|
|
``` |
|
|
|
# Pythia-1B |
|
|
|
## Model Details |
|
|
|
##### Pretrained Model |
|
|
|
- Developed by: [EleutherAI](http://eleuther.ai) |
|
- Model type: Transformer-based Language Model |
|
- License: [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) |
|
|
|
### Train Parametes |
|
|
|
- learning-rate : 2e-4 |
|
- sc : cosine lr |
|
- device : A100 GPU * 2 |
|
- batch-size: AutoFind |
|
- train time 72 H |
|
- max sequence length: 2048 |