import gradio as gr from datasets import load_dataset from difflib import get_close_matches # Load the WikiSQL dataset wikisql_dataset = load_dataset("wikisql", split='train[:100]') # Create a mapping between natural language queries and SQL queries query_sql_mapping = {item['question']: item['sql']['human_readable'] for item in wikisql_dataset} def find_closest_match(query, dataset): questions = [item['question'] for item in dataset] matches = get_close_matches(query, questions, n=1) return matches[0] if matches else None def generate_sql_from_user_input(query): # Find the closest match in the dataset matched_query = find_closest_match(query, wikisql_dataset) if not matched_query: return "No close match found in the dataset." # Retrieve the corresponding SQL query from the dataset sql_query = query_sql_mapping.get(matched_query, "SQL query not found.") return sql_query # Create a Gradio interface interface = gr.Interface( fn=generate_sql_from_user_input, inputs=gr.Textbox(label="Enter your natural language query"), outputs=gr.Textbox(label="SQL Query from Dataset"), title="NL to SQL using WikiSQL Dataset", description="Enter a natural language query and get the corresponding SQL query from the WikiSQL dataset." ) # Launch the app if __name__ == "__main__": interface.launch()