Module interact

Expand source code
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoTokenizer,AutoModelWithLMHead
""" 
    Class which acts as a user interface and which tries to interact with gpt2-medium model
"""
class InteractWithGptModel():
    def __init__(self,model_path,max_length,temperature,use_cuda,lang,query):
        self.model_path = model_path
        self.max_length = max_length
        self.temperature = temperature
        self.use_cuda = use_cuda
        self.model = None
        self.tokenizer = None
        self.lang = lang #can be python or java ,otherwise this class will never be called
        self.query = query #oriignal query taken from vscode editor

        

        print("InteractWithGptModel object succesfully created")

    # load fine-tunned model from path

    def load_model(self):
        self.model = AutoModelWithLMHead.from_pretrained("chirag2706/gpt2_code_generation_model")

    #load tokenizer from path

    def load_tokenizer(self):
        self.tokenizer = AutoTokenizer.from_pretrained("chirag2706/gpt2_code_generation_model")

    #set language ,either python or java as for now the fine-tunned model supports two programming languages, namely, python and java
    def set_lang(self,lang):
        self.lang = lang

    def set_query(self,query):
        self.query  = query

    

    # function which tries to generate output(generate code) based on given query and based on language (either python3 or java)
    def generate_output(self):
        print(self.model_path)
        print(self.max_length)
        print(self.temperature)
        print(self.use_cuda)
        print(self.lang)
        print(self.query)
        input_ids = self.tokenizer.encode("<python> " + self.query, return_tensors='pt') if self.lang == "python3" else self.tokenizer.encode("<java> " + self.query, return_tensors='pt')
        outputs = self.model.generate(input_ids=input_ids.to("cuda") if self.use_cuda else input_ids,
                                 max_length=self.max_length,
                                 temperature=self.temperature,
                                 num_return_sequences=1)

        print(len(outputs))
        
        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # # # ends with occurence of double new lines (to meet the convention of code completion)
        # if "return" in decoded:
        #     decoded = decoded[:decoded.index("return")]

        resultString = ""
        currentWord = ""
        flag = False

        for i in decoded:
            if(i == "\n"):
                
                resultString+=currentWord
                resultString+="\n"
                currentWord = ""
                if(flag):
                    break
            elif(i == " "):
                if(currentWord == "return" and not flag):
                    flag = True
                resultString+=currentWord
                resultString+=" "
                currentWord=""
            else:
                currentWord+=i


        return resultString

Classes

class InteractWithGptModel (model_path, max_length, temperature, use_cuda, lang, query)
Expand source code
class InteractWithGptModel():
    def __init__(self,model_path,max_length,temperature,use_cuda,lang,query):
        self.model_path = model_path
        self.max_length = max_length
        self.temperature = temperature
        self.use_cuda = use_cuda
        self.model = None
        self.tokenizer = None
        self.lang = lang #can be python or java ,otherwise this class will never be called
        self.query = query #oriignal query taken from vscode editor

        

        print("InteractWithGptModel object succesfully created")

    # load fine-tunned model from path

    def load_model(self):
        self.model = AutoModelWithLMHead.from_pretrained("chirag2706/gpt2_code_generation_model")

    #load tokenizer from path

    def load_tokenizer(self):
        self.tokenizer = AutoTokenizer.from_pretrained("chirag2706/gpt2_code_generation_model")

    #set language ,either python or java as for now the fine-tunned model supports two programming languages, namely, python and java
    def set_lang(self,lang):
        self.lang = lang

    def set_query(self,query):
        self.query  = query

    

    # function which tries to generate output(generate code) based on given query and based on language (either python3 or java)
    def generate_output(self):
        print(self.model_path)
        print(self.max_length)
        print(self.temperature)
        print(self.use_cuda)
        print(self.lang)
        print(self.query)
        input_ids = self.tokenizer.encode("<python> " + self.query, return_tensors='pt') if self.lang == "python3" else self.tokenizer.encode("<java> " + self.query, return_tensors='pt')
        outputs = self.model.generate(input_ids=input_ids.to("cuda") if self.use_cuda else input_ids,
                                 max_length=self.max_length,
                                 temperature=self.temperature,
                                 num_return_sequences=1)

        print(len(outputs))
        
        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # # # ends with occurence of double new lines (to meet the convention of code completion)
        # if "return" in decoded:
        #     decoded = decoded[:decoded.index("return")]

        resultString = ""
        currentWord = ""
        flag = False

        for i in decoded:
            if(i == "\n"):
                
                resultString+=currentWord
                resultString+="\n"
                currentWord = ""
                if(flag):
                    break
            elif(i == " "):
                if(currentWord == "return" and not flag):
                    flag = True
                resultString+=currentWord
                resultString+=" "
                currentWord=""
            else:
                currentWord+=i


        return resultString

Methods

def generate_output(self)
Expand source code
def generate_output(self):
    print(self.model_path)
    print(self.max_length)
    print(self.temperature)
    print(self.use_cuda)
    print(self.lang)
    print(self.query)
    input_ids = self.tokenizer.encode("<python> " + self.query, return_tensors='pt') if self.lang == "python3" else self.tokenizer.encode("<java> " + self.query, return_tensors='pt')
    outputs = self.model.generate(input_ids=input_ids.to("cuda") if self.use_cuda else input_ids,
                             max_length=self.max_length,
                             temperature=self.temperature,
                             num_return_sequences=1)

    print(len(outputs))
    
    decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    # # # ends with occurence of double new lines (to meet the convention of code completion)
    # if "return" in decoded:
    #     decoded = decoded[:decoded.index("return")]

    resultString = ""
    currentWord = ""
    flag = False

    for i in decoded:
        if(i == "\n"):
            
            resultString+=currentWord
            resultString+="\n"
            currentWord = ""
            if(flag):
                break
        elif(i == " "):
            if(currentWord == "return" and not flag):
                flag = True
            resultString+=currentWord
            resultString+=" "
            currentWord=""
        else:
            currentWord+=i


    return resultString
def load_model(self)
Expand source code
def load_model(self):
    self.model = AutoModelWithLMHead.from_pretrained("chirag2706/gpt2_code_generation_model")
def load_tokenizer(self)
Expand source code
def load_tokenizer(self):
    self.tokenizer = AutoTokenizer.from_pretrained("chirag2706/gpt2_code_generation_model")
def set_lang(self, lang)
Expand source code
def set_lang(self,lang):
    self.lang = lang
def set_query(self, query)
Expand source code
def set_query(self,query):
    self.query  = query