Skip to content

Question2sql

Question2SQL

Get the question and the table schema and return the SQL query

Source code in TimelineKGQA/rag/question2sql.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
class Question2SQL:
    """
    Get the question and the table schema and return the SQL query
    """

    def __init__(
        self,
        table_name: str,
        host: str,
        port: int,
        user: str,
        password: str,
        db_name: str,
        text2sql_table_name: str = None,
    ):
        """
        Args:
            table_name: The name of the table
            host: The host of the database
            port: The port of the database
            user: The user of the database
            password: The password of the database
            db_name: The name of the database



        """
        self.table_name = table_name
        self.host = host
        self.port = port
        self.user = user
        self.password = password
        self.db_name = db_name

        self.engine = create_engine(
            f"postgresql://{user}:{password}@{host}:{port}/{db_name}"
        )

        if text2sql_table_name is None:
            self.text2sql_table_name = f"{table_name}_text2sql"
        else:
            self.text2sql_table_name = f"{table_name}_{text2sql_table_name}"
        logger.info(f"Text2SQL Table Name: {self.text2sql_table_name}")

        # create a table if not exists to store the text2sql questions and results
        with self.engine.connect() as connection:
            connection.execute(
                text(
                    f"""
                    CREATE TABLE IF NOT EXISTS {self.text2sql_table_name} (
                        id SERIAL PRIMARY KEY,
                        question TEXT,
                        question_level TEXT,
                        prompt TEXT,
                        prompt_semantic TEXT,
                        sql_query TEXT,
                        sql_query_semantic TEXT,
                        correct BOOLEAN,
                        correct_semantic BOOLEAN
                    );
                    """
                )
            )
            connection.commit()

    def benchmark(
        self,
        semantic_parse: bool = False,
        model_name: str = "gpt-3.5-turbo",
    ):
        """

        Args:
            semantic_parse: If True, use the semantic parse to generate the prompt
            model_name: The model name
        """
        questions_df = pd.read_sql(
            f"SELECT * FROM {self.table_name}_questions",
            self.engine,
        )

        logger.info(f"Number of questions: {len(questions_df)}")

        table_schema = self.get_table_schema()
        logger.info(f"Table Schema: {table_schema}")

        prompt_df = pd.read_sql(
            f"SELECT * FROM {self.text2sql_table_name}",
            self.engine,
        )

        for index, row in tqdm(questions_df.iterrows(), total=len(questions_df)):
            question = row["question"]
            if question in prompt_df["question"].values:
                continue

            events = row["events"]
            prompt_semantic = (
                self.process_question_to_prompt_with_semantic_parse_few_shot(
                    question, events, table_schema
                )
            )

            prompt = self.process_question_to_prompt(question, table_schema)

            query = text(
                f"""
                              INSERT INTO {self.text2sql_table_name} (question, question_level, prompt, prompt_semantic)
                              VALUES (:question, :question_level, :prompt, :prompt_semantic)
                          """
            )

            with self.engine.connect() as connection:
                connection.execute(
                    query,
                    {
                        "question": question,
                        "question_level": row["question_level"],
                        "prompt": prompt,
                        "prompt_semantic": prompt_semantic,
                    },
                )

                connection.commit()

        prompt_df = pd.read_sql(
            f"SELECT * FROM {self.text2sql_table_name}",
            self.engine,
        )

        with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count()) as executor:
            futures = []
            for index, row in prompt_df.iterrows():
                question = row["question"]

                progress_check_key = (
                    "sql_query" if not semantic_parse else "sql_query_semantic"
                )

                if (
                    row[progress_check_key] is not None
                    and row[progress_check_key] != ""
                ):
                    continue
                if semantic_parse:
                    prompt = row["prompt_semantic"]
                else:
                    prompt = row["prompt"]
                futures.append(
                    executor.submit(
                        self.text2sql_generation,
                        prompt=prompt,
                        question=question,
                        model_name=model_name,
                        semantic_parse=semantic_parse,
                    )
                )

        self.verify_results(semantic_parse=semantic_parse)

    def process_question_to_prompt(self, question: str, table_schema: str):
        """
        Process the question to the prompt

        Args:
            question: The question
            table_schema: The table schema

        Returns:
            The prompt
        """
        prompt = f"""question: {question}
        The related knowledge to answer this question is in table {table_schema},
        the table name is {self.table_name},
        Generate the sql query to retrieve the relevant information from the table to answer the question.
        Return all columns for the rows that satisfy the condition.
        Return the SQL query  in json format with the key "sql_query"
        """
        return prompt

    def process_question_to_prompt_with_semantic_parse(
        self, question: str, events: List, table_schema: str
    ):
        # TODO: and question mark, should we do this? and How?
        """
        Process the question to the prompt

        Args:
            question: The question
            events: The events
            table_schema: The table schema

        Returns:
            The prompt
        """

        related_entities = []

        for event in events:
            items = event.split("|")
            if len(items) != 5:
                continue
            subject, predicate, tail_object, start_time, end_time = event.split("|")
            if subject in question:
                related_entities.append(subject)
            if tail_object in question:
                related_entities.append(tail_object)
        related_entities = ",".join(related_entities)
        prompt = f"""question: {question}
        The related knowledge to answer this question is in table {table_schema},
        the table name is {self.table_name},        
        entities can be used as where clause: {related_entities}
        Generate the sql query to retrieve the relevant information from the table to answer the question.
        Return all columns for the rows that satisfy the condition.
        Return the SQL query in json format with the key "sql_query"
        """
        return prompt

    def process_question_to_prompt_with_semantic_parse_few_shot(
        self, question: str, events: List, table_schema: str
    ):
        # TODO: and question mark, should we do this? and How?
        """
        Process the question to the prompt

        Args:
            question: The question
            events: The events
            table_schema: The table schema

        Returns:
            The prompt
        """

        related_entities = []

        for event in events:
            items = event.split("|")
            if len(items) != 5:
                continue
            subject, predicate, tail_object, start_time, end_time = event.split("|")
            if subject in question:
                related_entities.append(subject)
            if tail_object in question:
                related_entities.append(tail_object)
        related_entities = ",".join(related_entities)
        prompt = f"""question: {question}
        The related knowledge to answer this question is in table {table_schema},
        the table name is {self.table_name},        
        entities can be used as where clause: {related_entities}
        Generate the sql query to retrieve the relevant information from the table to answer the question.
        To achieve that, you will need to find a record where the subject and/or object matches the entities in the question.
        For example, for question
        From when to when, Assef Shawkat Affiliation To Military (Syria), at the same time, Lisa Ibrahim Affiliation To Brunei, at the same time, Salman Khurshid Affiliation To Elite (India)?
        The answer is
        SELECT * FROM unified_kg_icews_actor WHERE (subject = 'Assef Shawkat' AND object = 'Military (Syria)') OR (subject = 'Lisa Ibrahim' AND object = 'Brunei') OR (subject = 'Salman Khurshid' AND object = 'Elite (India)')
        Return all columns for the rows that satisfy the condition.
        Return the SQL query in json format with the key "sql_query"
        """
        return prompt

    def text2sql_generation(
        self,
        prompt: str,
        question: str,
        model_name: str = "gpt-3.5-turbo",
        semantic_parse: bool = False,
    ):
        """
        This function will call text2sql_gpt and update the sql_query in the database
        Args:
            prompt: The prompt
            question: The question
            model_name: The model name
            semantic_parse: If True, use the semantic parse to generate the prompt
        """
        sql_query = self.text2sql_gpt(prompt=prompt, model_name=model_name)
        self.update_sql_query(
            question=question, sql_query=sql_query, semantic_parse=semantic_parse
        )

    @backoff.on_exception(
        backoff.constant, RateLimitError, raise_on_giveup=True, interval=20
    )
    def text2sql_gpt(self, prompt: str, model_name: str = "gpt-3.5-turbo"):
        """
        Get the question and the table schema and return the SQL query

        Args:
            prompt(str): The prompt
            model_name: The model name
        """
        try:
            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {
                        "role": "system",
                        "content": """You are an expert about text to SQL in PostgreSQL database
                                      """,
                    },
                    {
                        "role": "user",
                        "content": prompt,
                    },
                ],
                response_format={"type": "json_object"},
                temperature=0,
            )
            logger.debug(f"Response: {response.choices[0].message.content}")
            sql_query = response.choices[0].message.content
            query_json = json.loads(sql_query)
            sql_query = query_json["sql_query"]
            return sql_query
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            raise e

    def update_sql_query(
        self, question: str, sql_query: str, semantic_parse: bool = False
    ):
        if semantic_parse:
            query = text(
                f"""
                      UPDATE {self.text2sql_table_name}
                      SET sql_query_semantic = :sql_query
                      WHERE question = :question
                  """
            )
        else:
            query = text(
                f"""
                          UPDATE {self.text2sql_table_name}
                          SET sql_query = :sql_query
                          WHERE question = :question
                      """
            )

        with self.engine.connect() as connection:
            connection.execute(
                query,
                {
                    "question": question,
                    "sql_query": sql_query,
                },
            )
            connection.commit()

    def verify_results(self, semantic_parse: bool = False):
        if semantic_parse:
            prompts_df = pd.read_sql(
                f"SELECT * FROM {self.text2sql_table_name} WHERE sql_query_semantic IS NOT NULL",
                self.engine,
            )
        else:
            prompts_df = pd.read_sql(
                f"SELECT * FROM {self.text2sql_table_name} WHERE sql_query IS NOT NULL",
                self.engine,
            )
        questions_df = pd.read_sql(
            f"SELECT * FROM {self.table_name}_questions",
            self.engine,
        )

        with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count()) as executor:
            futures = []
            for index, row in prompts_df.iterrows():
                progress_key = "correct" if not semantic_parse else "correct_semantic"
                if row[progress_key] is not None:
                    continue
                futures.append(
                    executor.submit(
                        self.verify_one_result,
                        row=row,
                        questions_df=questions_df,
                        semantic_parse=semantic_parse,
                    )
                )

    def verify_one_result(self, row, questions_df, semantic_parse: bool = False):
        connection = self.engine.connect()
        question = row["question"]
        if semantic_parse:
            sql_query = row["sql_query_semantic"]
        else:
            sql_query = row["sql_query"]
        progress_key = "correct" if not semantic_parse else "correct_semantic"
        if row[progress_key] is not None:
            return
        query = text(sql_query)
        try:
            df = pd.read_sql(query, self.engine)
            events = []
            for _, event in df.iterrows():
                subject = event["subject"]
                predicate = event["predicate"]
                tail_object = event["object"]
                start_time = event["start_time"]
                end_time = event["end_time"]

                events.append(
                    f"{subject}|{predicate}|{tail_object}|{start_time}|{end_time}"
                )
            # locate the ground truth for the question
            ground_truth = questions_df[questions_df["question"] == question]
            ground_truth_events = ground_truth["events"].values.tolist()
            # decompose the nested list
            ground_truth_events = [
                item for sublist in ground_truth_events for item in sublist
            ]
            logger.info(f"Question: {question}")
            logger.info(f"SQL Query: {sql_query}")
            logger.info(f"Events: {events}")
            logger.info(f"Ground Truth Events: {ground_truth_events}")
            if set(events) == set(ground_truth_events):
                correct = True
            else:
                correct = False
        except Exception as e:
            logger.exception(e)
            correct = False

        if semantic_parse:
            query = text(
                f"""
                      UPDATE {self.text2sql_table_name}
                      SET correct_semantic = :correct
                      WHERE question = :question
                  """
            )
        else:
            query = text(
                f"""
                              UPDATE {self.text2sql_table_name}
                              SET correct = :correct
                              WHERE question = :question
                          """
            )

        connection.execute(
            query,
            {
                "question": question,
                "correct": correct,
            },
        )
        connection.commit()
        connection.close()

    def get_table_schema(self):
        query = f"""
        SELECT 
            column_name, 
            data_type, 
            character_maximum_length, 
            is_nullable, 
            column_default
        FROM 
            information_schema.columns
        WHERE 
            table_name = '{self.table_name}';
        """
        df = pd.read_sql(query, self.engine)
        return df.to_markdown(index=False)

__init__(table_name, host, port, user, password, db_name, text2sql_table_name=None)

Parameters:

Name Type Description Default
table_name str

The name of the table

required
host str

The host of the database

required
port int

The port of the database

required
user str

The user of the database

required
password str

The password of the database

required
db_name str

The name of the database

required
Source code in TimelineKGQA/rag/question2sql.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def __init__(
    self,
    table_name: str,
    host: str,
    port: int,
    user: str,
    password: str,
    db_name: str,
    text2sql_table_name: str = None,
):
    """
    Args:
        table_name: The name of the table
        host: The host of the database
        port: The port of the database
        user: The user of the database
        password: The password of the database
        db_name: The name of the database



    """
    self.table_name = table_name
    self.host = host
    self.port = port
    self.user = user
    self.password = password
    self.db_name = db_name

    self.engine = create_engine(
        f"postgresql://{user}:{password}@{host}:{port}/{db_name}"
    )

    if text2sql_table_name is None:
        self.text2sql_table_name = f"{table_name}_text2sql"
    else:
        self.text2sql_table_name = f"{table_name}_{text2sql_table_name}"
    logger.info(f"Text2SQL Table Name: {self.text2sql_table_name}")

    # create a table if not exists to store the text2sql questions and results
    with self.engine.connect() as connection:
        connection.execute(
            text(
                f"""
                CREATE TABLE IF NOT EXISTS {self.text2sql_table_name} (
                    id SERIAL PRIMARY KEY,
                    question TEXT,
                    question_level TEXT,
                    prompt TEXT,
                    prompt_semantic TEXT,
                    sql_query TEXT,
                    sql_query_semantic TEXT,
                    correct BOOLEAN,
                    correct_semantic BOOLEAN
                );
                """
            )
        )
        connection.commit()

benchmark(semantic_parse=False, model_name='gpt-3.5-turbo')

Parameters:

Name Type Description Default
semantic_parse bool

If True, use the semantic parse to generate the prompt

False
model_name str

The model name

'gpt-3.5-turbo'
Source code in TimelineKGQA/rag/question2sql.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def benchmark(
    self,
    semantic_parse: bool = False,
    model_name: str = "gpt-3.5-turbo",
):
    """

    Args:
        semantic_parse: If True, use the semantic parse to generate the prompt
        model_name: The model name
    """
    questions_df = pd.read_sql(
        f"SELECT * FROM {self.table_name}_questions",
        self.engine,
    )

    logger.info(f"Number of questions: {len(questions_df)}")

    table_schema = self.get_table_schema()
    logger.info(f"Table Schema: {table_schema}")

    prompt_df = pd.read_sql(
        f"SELECT * FROM {self.text2sql_table_name}",
        self.engine,
    )

    for index, row in tqdm(questions_df.iterrows(), total=len(questions_df)):
        question = row["question"]
        if question in prompt_df["question"].values:
            continue

        events = row["events"]
        prompt_semantic = (
            self.process_question_to_prompt_with_semantic_parse_few_shot(
                question, events, table_schema
            )
        )

        prompt = self.process_question_to_prompt(question, table_schema)

        query = text(
            f"""
                          INSERT INTO {self.text2sql_table_name} (question, question_level, prompt, prompt_semantic)
                          VALUES (:question, :question_level, :prompt, :prompt_semantic)
                      """
        )

        with self.engine.connect() as connection:
            connection.execute(
                query,
                {
                    "question": question,
                    "question_level": row["question_level"],
                    "prompt": prompt,
                    "prompt_semantic": prompt_semantic,
                },
            )

            connection.commit()

    prompt_df = pd.read_sql(
        f"SELECT * FROM {self.text2sql_table_name}",
        self.engine,
    )

    with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count()) as executor:
        futures = []
        for index, row in prompt_df.iterrows():
            question = row["question"]

            progress_check_key = (
                "sql_query" if not semantic_parse else "sql_query_semantic"
            )

            if (
                row[progress_check_key] is not None
                and row[progress_check_key] != ""
            ):
                continue
            if semantic_parse:
                prompt = row["prompt_semantic"]
            else:
                prompt = row["prompt"]
            futures.append(
                executor.submit(
                    self.text2sql_generation,
                    prompt=prompt,
                    question=question,
                    model_name=model_name,
                    semantic_parse=semantic_parse,
                )
            )

    self.verify_results(semantic_parse=semantic_parse)

process_question_to_prompt(question, table_schema)

Process the question to the prompt

Parameters:

Name Type Description Default
question str

The question

required
table_schema str

The table schema

required

Returns:

Type Description

The prompt

Source code in TimelineKGQA/rag/question2sql.py
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
def process_question_to_prompt(self, question: str, table_schema: str):
    """
    Process the question to the prompt

    Args:
        question: The question
        table_schema: The table schema

    Returns:
        The prompt
    """
    prompt = f"""question: {question}
    The related knowledge to answer this question is in table {table_schema},
    the table name is {self.table_name},
    Generate the sql query to retrieve the relevant information from the table to answer the question.
    Return all columns for the rows that satisfy the condition.
    Return the SQL query  in json format with the key "sql_query"
    """
    return prompt

process_question_to_prompt_with_semantic_parse(question, events, table_schema)

Process the question to the prompt

Parameters:

Name Type Description Default
question str

The question

required
events List

The events

required
table_schema str

The table schema

required

Returns:

Type Description

The prompt

Source code in TimelineKGQA/rag/question2sql.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def process_question_to_prompt_with_semantic_parse(
    self, question: str, events: List, table_schema: str
):
    # TODO: and question mark, should we do this? and How?
    """
    Process the question to the prompt

    Args:
        question: The question
        events: The events
        table_schema: The table schema

    Returns:
        The prompt
    """

    related_entities = []

    for event in events:
        items = event.split("|")
        if len(items) != 5:
            continue
        subject, predicate, tail_object, start_time, end_time = event.split("|")
        if subject in question:
            related_entities.append(subject)
        if tail_object in question:
            related_entities.append(tail_object)
    related_entities = ",".join(related_entities)
    prompt = f"""question: {question}
    The related knowledge to answer this question is in table {table_schema},
    the table name is {self.table_name},        
    entities can be used as where clause: {related_entities}
    Generate the sql query to retrieve the relevant information from the table to answer the question.
    Return all columns for the rows that satisfy the condition.
    Return the SQL query in json format with the key "sql_query"
    """
    return prompt

process_question_to_prompt_with_semantic_parse_few_shot(question, events, table_schema)

Process the question to the prompt

Parameters:

Name Type Description Default
question str

The question

required
events List

The events

required
table_schema str

The table schema

required

Returns:

Type Description

The prompt

Source code in TimelineKGQA/rag/question2sql.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def process_question_to_prompt_with_semantic_parse_few_shot(
    self, question: str, events: List, table_schema: str
):
    # TODO: and question mark, should we do this? and How?
    """
    Process the question to the prompt

    Args:
        question: The question
        events: The events
        table_schema: The table schema

    Returns:
        The prompt
    """

    related_entities = []

    for event in events:
        items = event.split("|")
        if len(items) != 5:
            continue
        subject, predicate, tail_object, start_time, end_time = event.split("|")
        if subject in question:
            related_entities.append(subject)
        if tail_object in question:
            related_entities.append(tail_object)
    related_entities = ",".join(related_entities)
    prompt = f"""question: {question}
    The related knowledge to answer this question is in table {table_schema},
    the table name is {self.table_name},        
    entities can be used as where clause: {related_entities}
    Generate the sql query to retrieve the relevant information from the table to answer the question.
    To achieve that, you will need to find a record where the subject and/or object matches the entities in the question.
    For example, for question
    From when to when, Assef Shawkat Affiliation To Military (Syria), at the same time, Lisa Ibrahim Affiliation To Brunei, at the same time, Salman Khurshid Affiliation To Elite (India)?
    The answer is
    SELECT * FROM unified_kg_icews_actor WHERE (subject = 'Assef Shawkat' AND object = 'Military (Syria)') OR (subject = 'Lisa Ibrahim' AND object = 'Brunei') OR (subject = 'Salman Khurshid' AND object = 'Elite (India)')
    Return all columns for the rows that satisfy the condition.
    Return the SQL query in json format with the key "sql_query"
    """
    return prompt

text2sql_generation(prompt, question, model_name='gpt-3.5-turbo', semantic_parse=False)

This function will call text2sql_gpt and update the sql_query in the database Args: prompt: The prompt question: The question model_name: The model name semantic_parse: If True, use the semantic parse to generate the prompt

Source code in TimelineKGQA/rag/question2sql.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
def text2sql_generation(
    self,
    prompt: str,
    question: str,
    model_name: str = "gpt-3.5-turbo",
    semantic_parse: bool = False,
):
    """
    This function will call text2sql_gpt and update the sql_query in the database
    Args:
        prompt: The prompt
        question: The question
        model_name: The model name
        semantic_parse: If True, use the semantic parse to generate the prompt
    """
    sql_query = self.text2sql_gpt(prompt=prompt, model_name=model_name)
    self.update_sql_query(
        question=question, sql_query=sql_query, semantic_parse=semantic_parse
    )

text2sql_gpt(prompt, model_name='gpt-3.5-turbo')

Get the question and the table schema and return the SQL query

Parameters:

Name Type Description Default
prompt(str)

The prompt

required
model_name str

The model name

'gpt-3.5-turbo'
Source code in TimelineKGQA/rag/question2sql.py
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
@backoff.on_exception(
    backoff.constant, RateLimitError, raise_on_giveup=True, interval=20
)
def text2sql_gpt(self, prompt: str, model_name: str = "gpt-3.5-turbo"):
    """
    Get the question and the table schema and return the SQL query

    Args:
        prompt(str): The prompt
        model_name: The model name
    """
    try:
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {
                    "role": "system",
                    "content": """You are an expert about text to SQL in PostgreSQL database
                                  """,
                },
                {
                    "role": "user",
                    "content": prompt,
                },
            ],
            response_format={"type": "json_object"},
            temperature=0,
        )
        logger.debug(f"Response: {response.choices[0].message.content}")
        sql_query = response.choices[0].message.content
        query_json = json.loads(sql_query)
        sql_query = query_json["sql_query"]
        return sql_query
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        raise e