OneLLM / generate_pandalm.py
BellXP's picture
init
d25ae12
raw
history blame
No virus
1.12 kB
import os
import json
json_path1 = "llama_adapter_7b.json"
json_path2 = "alpaca_lora_7b.json"
out_path = "llama_adapter_vs_alpaca_lora.json"
data1 = open(json_path1).readlines()
data2 = open(json_path2).readlines()
question = open('question.jsonl').readlines()
assert len(data1) == len(data2) == len(question)
out_data = []
for i, (d1, d2, q) in enumerate(zip(data1, data2, question)):
d1 = json.loads(d1)
d2 = json.loads(d2)
q = json.loads(q)
out_d = {
'question_id': i,
'instruction': q['text'],
'input': '',
"response1": d1['text'],
"response2": d2['text'],
}
out_data.append(out_d)
# remove bias
for i, (d1, d2, q) in enumerate(zip(data2, data1, question)):
d1 = json.loads(d1)
d2 = json.loads(d2)
q = json.loads(q)
out_d = {
'question_id': i+80,
'instruction': q['text'],
'input': '',
"response1": d1['text'],
"response2": d2['text'],
}
out_data.append(out_d)
with open(out_path, 'w') as f:
# f.write("\n".join([json.dumps(x) for x in out_data]))
json.dump(out_data, f)