import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import os title = "# 🙋🏻‍♂️Tonic's ✒️InkubaLM-0.4B" description = """✒️InkubaLM has been trained from scratch using 1.9 billion tokens of data for five African languages, along with English and French data, totaling 2.4 billion tokens of data. It is capable of understanding and generating content in five African languages: Swahili, Yoruba, Hausa, isiZulu, and isiXhosa, as well as English and French.""" hf_token = os.getenv("HF_TOKEN") # Load the model and tokenizer model_name = "lelapa/InkubaLM-0.4B" model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_token) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_token) # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def generate_text(prompt, max_length, repetition_penalty): # Tokenize and generate the text input_ids = tokenizer(prompt, return_tensors="pt").to(device).input_ids outputs = model.generate(input_ids, max_length=max_length, repetition_penalty=repetition_penalty, pad_token_id=tokenizer.eos_token_id) # Decode the generated tokens and return the result generated_text = tokenizer.batch_decode(outputs[:, input_ids.shape[1]:], skip_special_tokens=True)[0].strip() return generated_text # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown(title) gr.Markdown(description) with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Enter your prompt here:", placeholder="Today I planned to ...") max_length = gr.Slider(label="Max Length", minimum=70, maximum=1000, step=50, value=200) repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.1, value=1.2) submit_button = gr.Button("Generate") with gr.Column(): output = gr.Textbox(label="✒️Inkuba.4B:"), inputs=[prompt, max_length, repetition_penalty], outputs=output) # Launch the demo demo.launch()