import streamlit as st import pandas as pd import plotly.express as px from wordcloud import WordCloud, STOPWORDS import matplotlib.pyplot as plt # Caching data loading @st.cache_data def load_data(): df = pd.read_csv("mafindo_mix_llm.csv") return df # Caching WordCloud generation @st.cache_resource def generate_wordcloud(text, colormap, stopwords): wordcloud = WordCloud(width=500, height=200, background_color='white', colormap=colormap, stopwords=stopwords).generate(text) return wordcloud def show_home(): # Load the dataset df = load_data() # Convert 'Tanggal' to datetime df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y') df['Year'] = df['Tanggal'].dt.year # Convert text columns to string to avoid type errors df['Content'] = df['Content'].astype(str) # Define additional stopwords additional_stopwords = {"dan", "di", "yang", "ke", "dari", "untuk", "pada", "adalah", "sebuah", "dengan", "tersebut", "ini", "itu", "atau", "dalam", "juga", "adalah", "yg", "tapi"} # Combine default stopwords with additional stopwords combined_stopwords = set(STOPWORDS).union(additional_stopwords) # Row with 4 visualizations col1, col2, col3, col4 = st.columns([1.5, 2.5, 1.5, 2.5]) # Visualization 1: Bar chart for Hoax vs Non-Hoax using Plotly with col1: st.markdown("
Hoax vs Non-Hoax
", unsafe_allow_html=True) df_label_counts = df['Label'].value_counts().reset_index() df_label_counts.columns = ['Label', 'Jumlah'] bar_chart_label = px.bar(df_label_counts, x='Label', y='Jumlah', color='Label', color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'}) bar_chart_label.update_layout( width=200, height=150, xaxis_title='Label', yaxis_title='Jumlah', xaxis_title_font_size=10, yaxis_title_font_size=10, xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10), showlegend=False ) st.plotly_chart(bar_chart_label, use_container_width=False) # Visualization 2: Bar chart for Hoax vs Non-Hoax per Data Source using Plotly with col2: st.markdown("
Hoax vs Non-Hoax per Data Source
", unsafe_allow_html=True) datasource_label_counts = df.groupby(['Datasource', 'Label']).size().reset_index(name='counts') fig_datasource = px.bar(datasource_label_counts, x='Datasource', y='counts', color='Label', barmode='group', color_discrete_map={'HOAX': 'red', 'NON-HOAX': 'green'}) fig_datasource.update_layout( width=500, height=150, xaxis_title='Datasource', yaxis_title='Jumlah', xaxis_title_font_size=10, yaxis_title_font_size=10, xaxis_tickfont_size=6, yaxis_tickfont_size=8, xaxis_tickangle=0, margin=dict(t=10, b=10, l=10, r=50), legend=dict( font=dict(size=8), # Smaller font size for the legend traceorder='normal', orientation='v', # Vertical orientation of the legend title_text='Label', # Title for the legend yanchor='top', y=1, xanchor='left', x=1.05, # Adjust position of the legend bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend bordercolor='rgba(0, 0, 0, 0)' # No border color ), showlegend=True ) st.plotly_chart(fig_datasource, use_container_width=False) # Visualization 3: Line chart for Hoax per Year using Plotly with col3: st.markdown("
Hoax per Tahun
", unsafe_allow_html=True) # Filter data to include only years up to 2023 hoax_per_year = df[(df['Label'] == 'HOAX') & (df['Year'] <= 2023)].groupby('Year').size().reset_index(name='count') line_chart_hoax = px.line(hoax_per_year, x='Year', y='count', line_shape='linear', color_discrete_sequence=['red']) line_chart_hoax.update_layout( width=200, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Hoax', xaxis_title_font_size=10, yaxis_title_font_size=10, xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10), showlegend=False ) st.plotly_chart(line_chart_hoax, use_container_width=False) # Visualization 4: Bar chart for Topics per Year using Plotly with col4: st.markdown("
Topik per Tahun
", unsafe_allow_html=True) df['Tanggal'] = pd.to_datetime(df['Tanggal'], format='%d/%m/%Y') df['Year'] = df['Tanggal'].dt.year # Filter the data to include only years up to 2023 df_mafindo_filtered = df[df['Year'] <= 2023] topics_per_year = df_mafindo_filtered.groupby(['Year', 'Topic']).size().reset_index(name='count') # Create the vertical bar chart bar_chart_topics = px.bar(topics_per_year, x='Year', y='count', color='Topic', color_continuous_scale=px.colors.sequential.Viridis) # Update layout to adjust the legend bar_chart_topics.update_layout( width=600, height=150, xaxis_title='Tahun', yaxis_title='Jumlah Topik', xaxis_title_font_size=10, yaxis_title_font_size=10, xaxis_tickfont_size=8, yaxis_tickfont_size=8, margin=dict(t=10, b=10, l=10, r=10), showlegend=True, legend=dict( yanchor="top", y=1, xanchor="left", x=1.02, # Adjust position of the legend bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend bordercolor='rgba(0, 0, 0, 0)', # No border color itemclick='toggleothers', # Allow toggling of legend items itemsizing='constant', # Consistent sizing for legend items font=dict(size=8), traceorder='normal', orientation='v', # Vertical orientation of legend title_text='Topic' ) ) st.plotly_chart(bar_chart_topics, use_container_width=True) # Create a new row for WordCloud visualizations col5, col6, col7 = st.columns([2, 2.5, 2.5]) # Wordcloud for Hoax with col5: st.markdown("
Wordcloud Hoax
", unsafe_allow_html=True) hoax_text = ' '.join(df[df['Label'] == 'HOAX']['Content']) wordcloud_hoax = generate_wordcloud(hoax_text, 'Reds', combined_stopwords) fig_hoax = plt.figure(figsize=(5, 2.5)) plt.imshow(wordcloud_hoax, interpolation='bilinear') plt.axis('off') st.pyplot(fig_hoax) with col6: st.markdown("
Klasifikasi
", unsafe_allow_html=True) df['Classification'] = df['Classification'].str.lower() df_classification_counts = df['Classification'].value_counts().reset_index() df_classification_counts.columns = ['Classification', 'Count'] # Create the donut chart donut_chart_classification = px.pie(df_classification_counts, names='Classification', values='Count', hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2) # Update layout to move the legend and adjust its size donut_chart_classification.update_layout( width=300, height=170, # Adjust the size of the chart margin=dict(t=20, b=20, l=20, r=120), # Adjust margins to make room for the legend legend=dict( yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend bordercolor='rgba(0, 0, 0, 0)', # No border color itemclick='toggleothers', # Allow toggling of legend items itemsizing='constant', # Consistent sizing for legend items font=dict(size=8), # Smaller font size for the legend traceorder='normal', orientation='v', # Vertical legend title_text='Classification' # Title for the legend ) ) st.plotly_chart(donut_chart_classification, use_container_width=True) with col7: st.markdown("
Tone
", unsafe_allow_html=True) df_tone_counts = df['Tone'].value_counts().reset_index() df_tone_counts.columns = ['Tone', 'Count'] # Create the donut chart donut_chart_tone = px.pie(df_tone_counts, names='Tone', values='Count', hole=0.3, color_discrete_sequence=px.colors.qualitative.Set2) # Update layout to move the legend and adjust its size donut_chart_tone.update_layout( width=250, height=170, # Adjust the size of the chart margin=dict(t=20, b=20, l=20, r=100), # Adjust margins to make room for the legend legend=dict( yanchor="top", y=1, xanchor="left", x=1.07, # Adjust position of the legend bgcolor='rgba(255, 255, 255, 0)', # Transparent background for legend bordercolor='rgba(0, 0, 0, 0)', # No border color itemclick='toggleothers', # Allow toggling of legend items itemsizing='constant', # Consistent sizing for legend items font=dict(size=8), # Smaller font size for the legend traceorder='normal', orientation='v', # Vertical legend title_text='Tone' # Title for the legend ) ) st.plotly_chart(donut_chart_tone, use_container_width=True) col8, col9 = st.columns([5, 1.5]) # Evaluation Metrics Table data = [ ["indobenchmark/indobert-base-p2", 0.6898, 0.9793, 0.8094, 0.8400, 0.1981, 0.3206, 0.7023], ["cahya/bert-base-indonesian-522M", 0.7545, 0.8756, 0.8106, 0.6800, 0.4811, 0.5635, 0.7358], ["indolem/indobert-base-uncased", 0.7536, 0.8238, 0.7871, 0.6136, 0.5094, 0.5567, 0.7124], ["mdhugol/indonesia-bert-sentiment-classification", 0.7444, 0.8601, 0.7981, 0.6447, 0.4623, 0.5385, 0.7191] ] highest_accuracy = max(data, key=lambda x: x[-1]) # Header Table html_table = """ """ # Isi Data for row in data: formatted_row = [f"{item:.4f}" if isinstance(item, float) else item for item in row] if row == highest_accuracy: html_table += "" else: html_table += "" # Left-align the first column (Pre-trained Model) html_table += f"" # Center-align the rest of the columns for item in formatted_row[1:]: html_table += f"" html_table += "" # Add a border to the last row html_table += "" html_table += "
Pre-trained Model NON-HOAX HOAX Accuracy
Precision Recall F1-Score Precision Recall F1-Score
{row[0]}{item}
" # Tampilkan Tabel di Streamlit with col8: st.markdown("
Matriks Evaluasi
", unsafe_allow_html=True) st.markdown(html_table, unsafe_allow_html=True) html_table_col9 = """
Label Train Test Dev
HOAX 11.563 193 193
NON-HOAX 789 106 106
TOTAL 12,352 299 299
""" # Display the table in col9 using HTML with col9: st.markdown("
Statistik Data
", unsafe_allow_html=True) st.markdown(html_table_col9, unsafe_allow_html=True)