PGT-1B / README.md
erfanzar's picture
Update README.md
2612153
---
license: apache-2.0
datasets:
- erfanzar/Data-60K
- erfanzar/CC-OASST-1-EVAL
language:
- en
- fr
- fa
- nl
metrics:
- bertscore
pipeline_tag: text-generation
---
# OpenSourceTransformers-OST Project
[OST-OpenSourceTransformers Github](https://github.com/erfanzar/OST-OpenSourceTransformers)
## NOTE
Model Version 2 Released and you can use model with built in gradio [interface](https://github.com/erfanzar/OST-OpenSourceTransformers/blob/main/OST_UI/app.py)
The Pythia Suite is **NOT** intended for deployment. It is not in itself
a product and cannot be used for human-facing interactions. For example,
the model may generate harmful or offensive text...
and also remember that this model is not good enough for Persian, French, and Dutch at least for this version
this model had same traning parameters as [PGT-1B-2EP](https://huggingface.co/erfanzar/PGT-1B-2EP) but finetuned on more custom datas but
they both work kinda same i suggest you to test both of models and pick the one you like the most
## Hello community
this model can also run on 4 GB GPU RAM and know dialogs as well
## Usage Code
```python
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, PreTrainedTokenizer, logging, BloomModel
import torch
import textwrap
import os
from dataclasses import field, dataclass
from transformers import HfArgumentParser, GPTNeoXForCausalLM
import gradio as gr
import speech_recognition as sr
from typing import List, Optional
import copy
import whisper
logger = logging.get_logger(__name__)
logging.set_verbosity_info()
@dataclass
class LoadConfig:
mode: str = field(default='gui-chat', metadata={'help': 'mode to use ai in '})
model_id: str = field(default='erfanzar/PGT-1B', metadata={'help': 'model to load'})
load_model: bool = field(default=True, metadata={'help': "load model set to false for debug mode"})
torch_type: torch.dtype = field(default=torch.float16, metadata={'help': "data type"})
load_in_8bit: bool = field(default=False,
metadata={
'help': "load model in 8 bit to make the models smaller "
"and faster but its not recommended 😀 "})
whisper_model: str = field(default='base', metadata={'help': 'model to load for whisper '})
def load_model(config: LoadConfig):
logger.info(f'Loading model FROM : {config.model_id}')
_model = AutoModelForCausalLM.from_pretrained(
config.model_id,
load_in_8bit=config.load_in_8bit,
torch_dtype=config.torch_type,
) if config.load_model else None
model_whisper = whisper.load_model(config.whisper_model)
logger.info(
f'Done Loading Model with {(sum(m.numel() for m in _model.parameters()) / 1e9) if _model is not None else "NONE"} Billion Parameters')
logger.info(f'Loading Tokenizer FROM : {config.model_id}')
_tokenizer = AutoTokenizer.from_pretrained(config.model_id)
logger.info('Done Loading Tokenizer')
return _model, _tokenizer, model_whisper
def prompt_to_instruction(text: str):
return f"<|prompter|> {text} <|endoftext|><|assistant|>"
def generate(model: AutoModelForCausalLM, tokenizer, text: str, max_new_tokens: int = 1024,
use_prompt_to_instruction: bool = False, generation_config=None,
b_pair=False):
text = prompt_to_instruction(text) if use_prompt_to_instruction else text
for i in range(max_new_tokens):
enc = tokenizer(text, return_tensors='pt', add_special_tokens=False)
text_r = text
enc = model.generate(enc.input_ids.to(model.device), generation_config=generation_config)
text = tokenizer.decode(enc[0], skip_special_tokens=False)
text = text[:-4] + tokenizer.eos_token if text[-4:] == '\n\n\n\n' else text
if text.endswith(tokenizer.eos_token) or text.endswith('\n\n\n\n'):
yield text[len(text_r):] if b_pair else text
break
else:
yield text[len(text_r):] if b_pair else text
def verify_text(txt):
return '\n'.join([textwrap.fill(txt, width=110) for txt in txt.split('\n')])
def conversation(model, tokenizer, cache=None, max_new_tokens=512, byte_pair=False):
cache = '' if cache is None else cache
while True:
user = cache + prompt_to_instruction(input('>> '))
last_a = 'NONE'
for text in generate(model, tokenizer, text=user, max_new_tokens=max_new_tokens, b_pair=byte_pair,
use_prompt_to_instruction=False):
os.system('clear')
print(verify_text(text).
replace('<|prompter|>', 'User : ').
replace('<|endoftext|><|assistant|>', '\nAI :').
replace('<|endoftext|>', '\n'), end='')
last_a = text
cache += last_a[len(cache):]
class Conversation:
def __init__(self, model, tokenizer, config):
self.model: AutoModelForCausalLM = model
self.tokenizer: PreTrainedTokenizer = tokenizer
self.config: LoadConfig = config
def run(self, text,
cache, max_length, temperature, top_p, top_k,
repetition_penalty
):
opt = sort_cache_pgt(cache)
original_text = text
text = opt + prompt_to_instruction(text)
final_res = ''
generation_config = GenerationConfig(
eos_token_id=self.tokenizer.eos_token_id,
bos_token_id=self.tokenizer.bos_token_id,
pad_token_id=self.tokenizer.pad_token_id,
max_new_tokens=1,
max_length=max_length,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty
)
for byte in generate(self.model, self.tokenizer, text=text, b_pair=False,
generation_config=generation_config,
use_prompt_to_instruction=False):
final_res = byte
yield byte[len(text):].replace('<|endoftext|>', '')
answer = final_res[len(text):len(final_res) - len('<|endoftext|>')]
cache.append([original_text, answer])
return '', cache
def sort_cache_pgt(cache_):
if len(cache_) == 0:
opt = ''
else:
opt = ''
for f in cache_:
opt += f"<|prompter|>{f[0]}<|endoftext|><|assistant|>{f[1]}<|endoftext|>"
return opt
def sort_cache_lgem(cache_):
if len(cache_) == 0:
opt = ''
else:
opt = ''
for f in cache_:
opt += f"User:{f[0]}\nAI:{f[1]}"
return opt
def chat_bot_run(text: str, cache, max_new_tokens,
max_length,
temperature,
top_p,
top_k,
repetition_penalty,
voice):
if voice is not None:
text_rec = whisper_model.transcribe(voice)['text']
if text == '':
text = text_rec
opt = sort_cache_pgt(cache)
original_text = text
text = opt + prompt_to_instruction(text)
final_res = ''
generation_config = GenerationConfig(
max_length=max_length,
max_new_tokens=max_new_tokens,
temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
bos_token_id=tokenizer.bos_token_id
)
# cache_f = copy.deepcopy(cache)
cache_f = cache
cache_f.append([original_text, ''])
if model is not None:
for byte in generate(model, tokenizer, text=text, b_pair=False,
generation_config=generation_config, max_new_tokens=max_length,
use_prompt_to_instruction=False):
final_res = byte
chosen_byte = byte[len(text):].replace('<|endoftext|>', '')
print(chosen_byte)
cache_f[-1][1] = chosen_byte
yield '', cache_f
answer = final_res[len(text):len(final_res) - len('<|endoftext|>')]
else:
answer = 'It seems like im down or im not loaded yet 😇'
cache.append([original_text, answer])
return '', cache
def gradio_ui(main_class_conversation):
interface = gr.Interface(fn=main_class_conversation.run, outputs='text',
inputs=[gr.inputs.Textbox(lines=10, placeholder='Im just a placeholder ignore me ... '),
gr.inputs.Slider(default=1024, maximum=1024, minimum=1, label='Max Length'),
gr.inputs.Slider(default=0.9, maximum=1, minimum=0.2, label='Temperature'),
gr.inputs.Slider(default=0.95, maximum=0.9999, minimum=0.1, label='Top P'),
gr.inputs.Slider(default=50, maximum=100, minimum=1, label='Top K'),
gr.inputs.Slider(default=1.2, maximum=5, minimum=1,
label='Repetition Penalty')])
interface.queue()
interface.launch(share=True)
def gradio_ui_chat(main_class_conversation: Conversation):
theme = gr.themes.Soft(
primary_hue="cyan",
secondary_hue="teal",
neutral_hue=gr.themes.Color(c100="#f3f4f6", c200="#e5e7eb", c300="#d1d5db",
c400="#9ca3af", c50="#f9fafb", c500="#6b7280",
c600="#4b5563", c700="#374151", c800="#1f2937",
c900="#47a9c2", c950="#0b0f19"),
)
with gr.Blocks(
theme=theme) as block:
with gr.Row():
with gr.Column(scale=1):
max_length = gr.Slider(value=1024, maximum=1024, minimum=1, label='Max Length', step=1)
max_steam_tokens = gr.Slider(value=1, maximum=3, minimum=1, label='Max Stream Tokens', step=1)
temperature = gr.Slider(value=0.9, maximum=1, minimum=0.2, label='Temperature', step=0.01)
top_p = gr.Slider(value=0.95, maximum=0.9999, minimum=0.1, label='Top P', step=0.01)
top_k = gr.Slider(value=50, maximum=100, minimum=1, label='Top K', step=1)
penalty = gr.Slider(value=1.2, maximum=5, minimum=1, label='Repetition Penalty', step=0.1, visible=True)
# TODO
penalty_ = gr.Slider(value=1.2, maximum=10, minimum=1, label='Repetition', step=0.1, visible=True)
gre_mode = gr.Checkbox(label='Greedy Mode')
smart_mode = gr.Checkbox(label='Smart Mode')
informational_mode = gr.Checkbox(label='Informational Mode')
voice = gr.Audio(source='microphone', type="filepath", streaming=False, label='Smart Voice', )
with gr.Column(scale=4):
cache = gr.Chatbot(elem_id=main_class_conversation.config.model_id,
label=main_class_conversation.config.model_id).style(container=True,
height=680)
with gr.Row():
with gr.Column(scale=1):
submit = gr.Button()
with gr.Column(scale=4):
text = gr.Textbox(show_label=False).style(container=False)
submit.click(fn=chat_bot_run,
inputs=[text, cache, max_steam_tokens, max_length, temperature, top_p, top_k, penalty, voice],
outputs=[text, cache])
text.submit(fn=chat_bot_run,
inputs=[text, cache, max_steam_tokens, max_length, temperature, top_p, top_k, penalty, voice],
outputs=[text, cache])
gr.Markdown(
'LucidBrains is a platform that makes AI accessible and easy to use for everyone. '
'Our mission is to empower individuals and businesses '
'with the tools they need to harness the power of AI and machine learning,'
'without requiring a background in data science or anything we '
'will just build what you want for you and help you to have better time and living life'
'with using Artificial Intelligence and Pushing Technology Beyond Limits'
'\n[OST-OpenSourceTransformers](https://github.com/erfanzar/OST-OpenSourceTransformers) From LucidBrains 🧠\n'
)
block.queue().launch(debug=False, share=True, inline=True, show_tips=True, width='100%')
def main(config):
mcc = Conversation(model=model, tokenizer=tokenizer, config=config)
if config.mode == 'cli':
conversation(model=model, tokenizer=tokenizer)
if config.mode == 'gui':
gradio_ui(main_class_conversation=mcc)
if config.mode == 'gui-chat':
gradio_ui_chat(main_class_conversation=mcc)
else:
raise ValueError(f'Unknown Mode For : {config.mode}')
if __name__ == "__main__":
config_ = HfArgumentParser(LoadConfig).parse_args_into_dataclasses()[0]
# config_ = LoadConfig()
print(f'Running WITH MODE : {config_.mode}')
model, tokenizer, whisper_model = load_model(config=config_)
model = model.cuda()
whisper_model = whisper_model.cuda()
main(config_)
```
# Pythia-1B
## Model Details
##### Pretrained Model
- Developed by: [EleutherAI](http://eleuther.ai)
- Model type: Transformer-based Language Model
- License: [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
### Train Parametes
- learning-rate : 2e-4
- sc : cosine lr
- device : A100 GPU * 2
- batch-size: AutoFind
- train time 72 H
- max sequence length: 2048