PGT-1B / README.md

Update README.md

2612153 over 1 year ago

13.6 kB

	---
	license: apache-2.0
	datasets:
	- erfanzar/Data-60K
	- erfanzar/CC-OASST-1-EVAL
	language:
	- en
	- fr
	- fa
	- nl
	metrics:
	- bertscore
	pipeline_tag: text-generation
	---

	# OpenSourceTransformers-OST Project

	[OST-OpenSourceTransformers Github](https://github.com/erfanzar/OST-OpenSourceTransformers)

	## NOTE

	Model Version 2 Released and you can use model with built in gradio [interface](https://github.com/erfanzar/OST-OpenSourceTransformers/blob/main/OST_UI/app.py)


	The Pythia Suite is NOT intended for deployment. It is not in itself
	a product and cannot be used for human-facing interactions. For example,
	the model may generate harmful or offensive text...


	and also remember that this model is not good enough for Persian, French, and Dutch at least for this version

	this model had same traning parameters as [PGT-1B-2EP](https://huggingface.co/erfanzar/PGT-1B-2EP) but finetuned on more custom datas but
	they both work kinda same i suggest you to test both of models and pick the one you like the most


	## Hello community

	this model can also run on 4 GB GPU RAM and know dialogs as well

	## Usage Code

	```python

	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, PreTrainedTokenizer, logging, BloomModel
	import torch
	import textwrap
	import os
	from dataclasses import field, dataclass
	from transformers import HfArgumentParser, GPTNeoXForCausalLM
	import gradio as gr
	import speech_recognition as sr
	from typing import List, Optional
	import copy
	import whisper

	logger = logging.get_logger(__name__)
	logging.set_verbosity_info()


	@dataclass
	class LoadConfig:
	mode: str = field(default='gui-chat', metadata={'help': 'mode to use ai in '})
	model_id: str = field(default='erfanzar/PGT-1B', metadata={'help': 'model to load'})
	load_model: bool = field(default=True, metadata={'help': "load model set to false for debug mode"})
	torch_type: torch.dtype = field(default=torch.float16, metadata={'help': "data type"})
	load_in_8bit: bool = field(default=False,
	metadata={
	'help': "load model in 8 bit to make the models smaller "
	"and faster but its not recommended 😀 "})
	whisper_model: str = field(default='base', metadata={'help': 'model to load for whisper '})


	def load_model(config: LoadConfig):
	logger.info(f'Loading model FROM : {config.model_id}')
	_model = AutoModelForCausalLM.from_pretrained(
	config.model_id,
	load_in_8bit=config.load_in_8bit,
	torch_dtype=config.torch_type,
	) if config.load_model else None
	model_whisper = whisper.load_model(config.whisper_model)
	logger.info(
	f'Done Loading Model with {(sum(m.numel() for m in _model.parameters()) / 1e9) if _model is not None else "NONE"} Billion Parameters')
	logger.info(f'Loading Tokenizer FROM : {config.model_id}')
	_tokenizer = AutoTokenizer.from_pretrained(config.model_id)

	logger.info('Done Loading Tokenizer')
	return _model, _tokenizer, model_whisper


	def prompt_to_instruction(text: str):
	return f"<\|prompter\|> {text} <\|endoftext\|><\|assistant\|>"


	def generate(model: AutoModelForCausalLM, tokenizer, text: str, max_new_tokens: int = 1024,
	use_prompt_to_instruction: bool = False, generation_config=None,
	b_pair=False):
	text = prompt_to_instruction(text) if use_prompt_to_instruction else text

	for i in range(max_new_tokens):
	enc = tokenizer(text, return_tensors='pt', add_special_tokens=False)
	text_r = text
	enc = model.generate(enc.input_ids.to(model.device), generation_config=generation_config)
	text = tokenizer.decode(enc[0], skip_special_tokens=False)
	text = text[:-4] + tokenizer.eos_token if text[-4:] == '\n\n\n\n' else text
	if text.endswith(tokenizer.eos_token) or text.endswith('\n\n\n\n'):
	yield text[len(text_r):] if b_pair else text
	break
	else:
	yield text[len(text_r):] if b_pair else text


	def verify_text(txt):
	return '\n'.join([textwrap.fill(txt, width=110) for txt in txt.split('\n')])


	def conversation(model, tokenizer, cache=None, max_new_tokens=512, byte_pair=False):
	cache = '' if cache is None else cache
	while True:
	user = cache + prompt_to_instruction(input('>> '))
	last_a = 'NONE'
	for text in generate(model, tokenizer, text=user, max_new_tokens=max_new_tokens, b_pair=byte_pair,
	use_prompt_to_instruction=False):
	os.system('clear')
	print(verify_text(text).
	replace('<\|prompter\|>', 'User : ').
	replace('<\|endoftext\|><\|assistant\|>', '\nAI :').
	replace('<\|endoftext\|>', '\n'), end='')
	last_a = text
	cache += last_a[len(cache):]


	class Conversation:
	def __init__(self, model, tokenizer, config):
	self.model: AutoModelForCausalLM = model
	self.tokenizer: PreTrainedTokenizer = tokenizer
	self.config: LoadConfig = config

	def run(self, text,
	cache, max_length, temperature, top_p, top_k,
	repetition_penalty
	):
	opt = sort_cache_pgt(cache)
	original_text = text
	text = opt + prompt_to_instruction(text)
	final_res = ''
	generation_config = GenerationConfig(
	eos_token_id=self.tokenizer.eos_token_id,
	bos_token_id=self.tokenizer.bos_token_id,
	pad_token_id=self.tokenizer.pad_token_id,
	max_new_tokens=1,
	max_length=max_length,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=repetition_penalty
	)
	for byte in generate(self.model, self.tokenizer, text=text, b_pair=False,
	generation_config=generation_config,
	use_prompt_to_instruction=False):
	final_res = byte
	yield byte[len(text):].replace('<\|endoftext\|>', '')
	answer = final_res[len(text):len(final_res) - len('<\|endoftext\|>')]
	cache.append([original_text, answer])
	return '', cache


	def sort_cache_pgt(cache_):
	if len(cache_) == 0:
	opt = ''
	else:
	opt = ''
	for f in cache_:
	opt += f"<\|prompter\|>{f[0]}<\|endoftext\|><\|assistant\|>{f[1]}<\|endoftext\|>"

	return opt


	def sort_cache_lgem(cache_):
	if len(cache_) == 0:
	opt = ''
	else:
	opt = ''
	for f in cache_:
	opt += f"User:{f[0]}\nAI:{f[1]}"

	return opt


	def chat_bot_run(text: str, cache, max_new_tokens,
	max_length,
	temperature,
	top_p,
	top_k,
	repetition_penalty,
	voice):
	if voice is not None:
	text_rec = whisper_model.transcribe(voice)['text']
	if text == '':
	text = text_rec

	opt = sort_cache_pgt(cache)
	original_text = text
	text = opt + prompt_to_instruction(text)
	final_res = ''
	generation_config = GenerationConfig(
	max_length=max_length,
	max_new_tokens=max_new_tokens,
	temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.pad_token_id,
	bos_token_id=tokenizer.bos_token_id
	)
	# cache_f = copy.deepcopy(cache)
	cache_f = cache
	cache_f.append([original_text, ''])
	if model is not None:

	for byte in generate(model, tokenizer, text=text, b_pair=False,
	generation_config=generation_config, max_new_tokens=max_length,
	use_prompt_to_instruction=False):
	final_res = byte
	chosen_byte = byte[len(text):].replace('<\|endoftext\|>', '')
	print(chosen_byte)
	cache_f[-1][1] = chosen_byte
	yield '', cache_f
	answer = final_res[len(text):len(final_res) - len('<\|endoftext\|>')]
	else:
	answer = 'It seems like im down or im not loaded yet 😇'
	cache.append([original_text, answer])
	return '', cache


	def gradio_ui(main_class_conversation):
	interface = gr.Interface(fn=main_class_conversation.run, outputs='text',
	inputs=[gr.inputs.Textbox(lines=10, placeholder='Im just a placeholder ignore me ... '),
	gr.inputs.Slider(default=1024, maximum=1024, minimum=1, label='Max Length'),
	gr.inputs.Slider(default=0.9, maximum=1, minimum=0.2, label='Temperature'),
	gr.inputs.Slider(default=0.95, maximum=0.9999, minimum=0.1, label='Top P'),
	gr.inputs.Slider(default=50, maximum=100, minimum=1, label='Top K'),
	gr.inputs.Slider(default=1.2, maximum=5, minimum=1,
	label='Repetition Penalty')])
	interface.queue()
	interface.launch(share=True)

	def gradio_ui_chat(main_class_conversation: Conversation):
	theme = gr.themes.Soft(
	primary_hue="cyan",
	secondary_hue="teal",
	neutral_hue=gr.themes.Color(c100="#f3f4f6", c200="#e5e7eb", c300="#d1d5db",
	c400="#9ca3af", c50="#f9fafb", c500="#6b7280",
	c600="#4b5563", c700="#374151", c800="#1f2937",
	c900="#47a9c2", c950="#0b0f19"),
	)

	with gr.Blocks(
	theme=theme) as block:
	with gr.Row():
	with gr.Column(scale=1):
	max_length = gr.Slider(value=1024, maximum=1024, minimum=1, label='Max Length', step=1)
	max_steam_tokens = gr.Slider(value=1, maximum=3, minimum=1, label='Max Stream Tokens', step=1)
	temperature = gr.Slider(value=0.9, maximum=1, minimum=0.2, label='Temperature', step=0.01)
	top_p = gr.Slider(value=0.95, maximum=0.9999, minimum=0.1, label='Top P', step=0.01)
	top_k = gr.Slider(value=50, maximum=100, minimum=1, label='Top K', step=1)
	penalty = gr.Slider(value=1.2, maximum=5, minimum=1, label='Repetition Penalty', step=0.1, visible=True)
	# TODO
	penalty_ = gr.Slider(value=1.2, maximum=10, minimum=1, label='Repetition', step=0.1, visible=True)
	gre_mode = gr.Checkbox(label='Greedy Mode')
	smart_mode = gr.Checkbox(label='Smart Mode')
	informational_mode = gr.Checkbox(label='Informational Mode')
	voice = gr.Audio(source='microphone', type="filepath", streaming=False, label='Smart Voice', )
	with gr.Column(scale=4):
	cache = gr.Chatbot(elem_id=main_class_conversation.config.model_id,
	label=main_class_conversation.config.model_id).style(container=True,
	height=680)
	with gr.Row():
	with gr.Column(scale=1):
	submit = gr.Button()
	with gr.Column(scale=4):
	text = gr.Textbox(show_label=False).style(container=False)

	submit.click(fn=chat_bot_run,
	inputs=[text, cache, max_steam_tokens, max_length, temperature, top_p, top_k, penalty, voice],
	outputs=[text, cache])
	text.submit(fn=chat_bot_run,
	inputs=[text, cache, max_steam_tokens, max_length, temperature, top_p, top_k, penalty, voice],
	outputs=[text, cache])
	gr.Markdown(
	'LucidBrains is a platform that makes AI accessible and easy to use for everyone. '
	'Our mission is to empower individuals and businesses '
	'with the tools they need to harness the power of AI and machine learning,'
	'without requiring a background in data science or anything we '
	'will just build what you want for you and help you to have better time and living life'
	'with using Artificial Intelligence and Pushing Technology Beyond Limits'
	'\n[OST-OpenSourceTransformers](https://github.com/erfanzar/OST-OpenSourceTransformers) From LucidBrains 🧠\n'
	)
	block.queue().launch(debug=False, share=True, inline=True, show_tips=True, width='100%')


	def main(config):
	mcc = Conversation(model=model, tokenizer=tokenizer, config=config)
	if config.mode == 'cli':
	conversation(model=model, tokenizer=tokenizer)
	if config.mode == 'gui':
	gradio_ui(main_class_conversation=mcc)
	if config.mode == 'gui-chat':
	gradio_ui_chat(main_class_conversation=mcc)
	else:
	raise ValueError(f'Unknown Mode For : {config.mode}')


	if __name__ == "__main__":
	config_ = HfArgumentParser(LoadConfig).parse_args_into_dataclasses()[0]
	# config_ = LoadConfig()
	print(f'Running WITH MODE : {config_.mode}')
	model, tokenizer, whisper_model = load_model(config=config_)
	model = model.cuda()
	whisper_model = whisper_model.cuda()
	main(config_)


	```

	# Pythia-1B

	## Model Details

	##### Pretrained Model

	- Developed by: [EleutherAI](http://eleuther.ai)
	- Model type: Transformer-based Language Model
	- License: [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)

	### Train Parametes

	- learning-rate : 2e-4
	- sc : cosine lr
	- device : A100 GPU * 2
	- batch-size: AutoFind
	- train time 72 H
	- max sequence length: 2048