File size: 7,231 Bytes
b708f61
 
 
 
 
 
 
 
 
 
38d5029
b708f61
76e3ff9
 
 
 
 
b708f61
 
 
 
fc8b513
b708f61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc8b513
b708f61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc8b513
 
 
 
 
 
 
 
 
 
b708f61
 
 
 
 
 
 
 
 
fc8b513
b708f61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc8b513
 
b708f61
fc8b513
38d5029
 
b708f61
fc8b513
38d5029
b708f61
 
 
 
 
 
 
 
38d5029
 
b708f61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc8b513
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import streamlit as st
import pandas as pd
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
from test import predict_hoax, evaluate_model_performance
from load_model import load_model
from styles import COMMON_CSS
from google.cloud import storage
from io import StringIO
import os
from datetime import datetime
import pytz

with open("credentials.json", "w") as f:
    f.write(st.secrets["GOOGLE_APPLICATION_CREDENTIALS"])

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"

def save_corrections_to_gcs(bucket_name, file_name, correction_data):
    client = storage.Client()
    bucket = client.bucket("dashboardhoax-bucket")
    blob = bucket.blob("koreksi_pengguna_file.csv")

    if blob.exists():
        existing_data = blob.download_as_string().decode('utf-8')
        existing_df = pd.read_csv(StringIO(existing_data))
    else:
        existing_df = pd.DataFrame(columns=['Timestamp', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource', 'Result_Detection', 'Result_Correction'])

    new_data_df = pd.DataFrame(correction_data)
    updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)

    updated_csv_data = updated_df.to_csv(index=False)
    blob.upload_from_string(updated_csv_data, content_type='text/csv')

def load_data(file):
    return pd.read_csv(file)

def show_deteksi_upload():
    st.markdown(COMMON_CSS, unsafe_allow_html=True)
    
    st.markdown("<h6 style='font-size: 14px; margin-bottom: 0;'>Pilih Model</h6>", unsafe_allow_html=True)
    selected_model = st.selectbox(
        "",
        [
            "cahya/bert-base-indonesian-522M",
            "indobenchmark/indobert-base-p2",
            "indolem/indobert-base-uncased",
            "mdhugol/indonesia-bert-sentiment-classification"
        ],
        key="model_selector_upload"
    )

    tokenizer, model = load_model(selected_model)

    st.markdown("<h6 style='font-size: 14px; margin-bottom: -200px;'>Unggah File Disini</h6>", unsafe_allow_html=True)
    uploaded_file = st.file_uploader("", type="csv")

    if 'df' not in st.session_state:
        st.session_state.df = None

    if uploaded_file is not None:
        df = load_data(uploaded_file)
        df.index = df.index + 1

        st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Diunggah</h6>", unsafe_allow_html=True)

        grid_options = GridOptionsBuilder.from_dataframe(df)
        grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
        grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
        gridOptions = grid_options.build()

        AgGrid(
            df,
            gridOptions=gridOptions,
            update_mode=GridUpdateMode.VALUE_CHANGED,
            use_container_width=True
        )

        if st.button("Deteksi", key="detect_upload"):
            try:
                df['Result_Detection'] = df.apply(lambda row: predict_hoax(row['Title'], row['Content']), axis=1)
                df['Correction'] = False 
                st.session_state.df = df.copy()
            except Exception as e:
                st.error(f"Terjadi kesalahan saat deteksi: {e}")

    if st.session_state.df is not None:
        if 'Label' in st.session_state.df.columns:
            accuracy, precision, recall, f1 = evaluate_model_performance(st.session_state.df, tokenizer, model)
            performance_text = (
                f"*Performansi Model*\n\n"
                f"*Accuracy:* {round(accuracy, 2)}&nbsp;&nbsp;"
                f"*Precision:* {round(precision, 2)}&nbsp;&nbsp;"
                f"*Recall:* {round(recall, 2)}&nbsp;&nbsp;"
                f"*F1 Score:* {round(f1, 2)}"
            )
            st.success(performance_text)

        st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Hasil Deteksi</h6>", unsafe_allow_html=True)

        cols = ['Correction', 'Result_Detection'] + [col for col in st.session_state.df.columns if col not in ['Correction', 'Result_Detection', 'Label_id']]
        df_reordered = st.session_state.df[cols]

        grid_options = GridOptionsBuilder.from_dataframe(df_reordered)
        grid_options.configure_pagination(paginationAutoPageSize=False, paginationPageSize=10)
        grid_options.configure_default_column(editable=True, groupable=True)
        grid_options.configure_default_column(cellStyle={'fontSize': '12px'})
        gridOptions = grid_options.build()

        grid_response = AgGrid(
            st.session_state.df,
            gridOptions=gridOptions,
            update_mode=GridUpdateMode.VALUE_CHANGED
        )

        if grid_response['data'] is not None:
            edited_df = pd.DataFrame(grid_response['data'])
            st.session_state.df = edited_df.copy()
            corrected_df = edited_df[edited_df['Correction']].copy()

            edited_df['Result_Correction'] = edited_df.apply(lambda row: 
                'HOAX' if (row['Result_Detection'] == 'NON-HOAX' and row['Correction']) else 
                ('NON-HOAX' if (row['Result_Detection'] == 'HOAX' and row['Correction']) else row['Result_Detection']), 
                axis=1
            )

            st.session_state.df = edited_df.copy()

            if not corrected_df.empty:
                expected_cols = ['Timestamp', 'Result_Detection', 'Result_Correction', 'Label_id', 'Label', 'Title', 'Content', 'Fact', 'References', 'Classification', 'Datasource']
                existing_cols = [col for col in expected_cols if col in corrected_df.columns]

                # Tambahkan Timestamp hanya untuk penyimpanan
                wib = pytz.timezone('Asia/Jakarta')
                corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')

                corrected_df_to_display = corrected_df[existing_cols]

                st.markdown("<h6 style='font-size: 16px; margin-bottom: 0;'>Data yang Dikoreksi</h6>", unsafe_allow_html=True)
                st.dataframe(corrected_df_to_display, use_container_width=True, hide_index=True)
            else:
                st.write("Tidak ada data yang dikoreksi.")
        
        if st.button("Simpan", key="corrected_data"):
            if 'df' in st.session_state:
                corrected_df = st.session_state.df[st.session_state.df['Correction']].copy()
                wib = pytz.timezone('Asia/Jakarta')
                corrected_df['Timestamp'] = datetime.now(wib).strftime('%Y-%m-%d %H:%M:%S')
                corrected_df = corrected_df.drop(columns=['Correction'])

                if not corrected_df.empty:
                    bucket_name = "your-bucket-name"
                    file_name = "corrected_upload_data.csv"
                    
                    correction_data = corrected_df.to_dict(orient='records')
                    
                    save_corrections_to_gcs(bucket_name, file_name, correction_data)
                    
                    st.success("Data telah disimpan.")
                    st.session_state.corrected_df = corrected_df
                else:
                    st.warning("Tidak ada data yang dikoreksi untuk disimpan.")
            else:
                st.warning("Data deteksi tidak ditemukan.")