HusnaManakkot commited on
Commit
f4f80f2
1 Parent(s): baec2c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -51
app.py CHANGED
@@ -1,51 +1,26 @@
1
- import torch
2
- from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW
3
- from torch.utils.data import DataLoader, Dataset
4
- from tqdm import tqdm
5
-
6
- # Define your dataset class
7
- class SpiderDataset(Dataset):
8
- def __init__(self, encodings, labels):
9
- self.encodings = encodings
10
- self.labels = labels
11
-
12
- def __getitem__(self, idx):
13
- return {'input_ids': self.encodings[idx], 'labels': self.labels[idx]}
14
-
15
- def __len__(self):
16
- return len(self.encodings)
17
-
18
- # Load your preprocessed Spider dataset
19
- train_encodings = # Your preprocessed input encodings for training
20
- train_labels = # Your preprocessed labels for training
21
-
22
- # Create a PyTorch dataset and dataloader
23
- train_dataset = SpiderDataset(train_encodings, train_labels)
24
- train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
25
-
26
- # Load the pre-trained T5 model
27
- model = T5ForConditionalGeneration.from_pretrained('t5-base')
28
- tokenizer = T5Tokenizer.from_pretrained('t5-base')
29
-
30
- # Move the model to the GPU if available
31
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
32
- model.to(device)
33
-
34
- # Set up the optimizer
35
- optimizer = AdamW(model.parameters(), lr=5e-5)
36
-
37
- # Fine-tune the model
38
- model.train()
39
- for epoch in range(3): # Number of epochs
40
- for batch in tqdm(train_loader):
41
- optimizer.zero_grad()
42
- input_ids = batch['input_ids'].to(device)
43
- labels = batch['labels'].to(device)
44
- outputs = model(input_ids=input_ids, labels=labels)
45
- loss = outputs.loss
46
- loss.backward()
47
- optimizer.step()
48
-
49
- # Save the fine-tuned model
50
- model.save_pretrained('your_model_directory')
51
- tokenizer.save_pretrained('your_model_directory')
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+
4
+ # Load tokenizer and model
5
+ tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")
6
+ model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")
7
+
8
+ def generate_sql(query):
9
+ input_text = "translate English to SQL: " + query
10
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True)
11
+ outputs = model.generate(**inputs, max_length=512)
12
+ sql_query = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
+ return sql_query
14
+
15
+ # Create a Gradio interface
16
+ interface = gr.Interface(
17
+ fn=generate_sql,
18
+ inputs=gr.Textbox(lines=2, placeholder="Enter your natural language query here..."),
19
+ outputs="text",
20
+ title="NL to SQL with T5",
21
+ description="This model converts natural language queries into SQL. Enter your query!"
22
+ )
23
+
24
+ # Launch the app
25
+ if __name__ == "__main__":
26
+ interface.launch()